---
title: "'Something for Nothing' in Housing Politics: Preanalysis Plan"
author: "Chris Elmendorf, Clayton Nall, Stan Oklobdzija"
date: last-modified
date_format: long
format:
  pdf:
    keep-tex: true
    fig_caption: yes
    latex_engine: tinytex
    extra_dependencies: ["flafter"]
link-citations: true
linkcolor: blue
subparagraph: yes
citecolor: blue
urlcolor: blue
header-includes:
  - \counterwithin{figure}{section}
  - \setlength\parindent{24pt}
bibliography: allneo.bib
---

```{r}
#| label: setup
#| include: false

#Set global figure size to be bigger
knitr::opts_chunk$set(fig.width=12, fig.height=8) 
library(tidyverse)
library(here)
library(tidymodels)
library(DeclareDesign)
library(modelsummary)
library(BradleyTerry2)
library(lme4)
library(MASS)
library(clusterGeneration)
library(kableExtra)
library(patchwork)
library(boot) 
library(irr)
library(psych)
# library(gmodels)
# library(gtools)
library(readxl)
library(quanteda)
library(quanteda.textplots)
library(quanteda.textstats)
# library(naniar)
# library(stm)
# library(janitor)
# library(GGally)
 library(ggrepel)
# library(dotwhisker)
#library(randomForest)
# library(gridExtra)
 library(ggcorrplot)
# library(grid)
# library(ggpubr)
# library(sandwich)
# library(lmtest)
# library(ggeffects)
# library(zipcodeR)
# library(kableExtra)
library(gt)
 library(gtsummary)
# library(domir)
# library(quanteda)
# library(quanteda.textplots)
# library(quanteda.textstats)
# library(fixest) # packages fixest:dqrng are dependencies for wildrwolf
# library(fabricatr)
# library(JuliaConnectoR)
# library(collapse)
# library(dqrng)
# library(wildrwolf)
# library(hdm)
# library(NRejections)
# library(boot)
# library(RStata)
# library(EloChoice)
# library(EloRating)
# library(lubridate)
# library(boot)
# library(tictoc)
 library(corrr)
#library(ggpattern)
# library(weights)
 library(foreach)
 library(doParallel)
 library(doSNOW)


options(dplyr.width=Inf)
# checkpoint(snapshot_date = "2022-02-28", R.version = "4.1.2")

# format graphics setup
slides <- TRUE # toggle for making slides
if(slides){
  axis_text <- 24
  main_text <- 30
  corr_lab_size <- 5
  graph_text <- 20
  geom_text_text <- 16
  geom_dot_text <- 0.5*geom_text_text
  wrap_size <- 40
} else {
  axis_text <- 12
  main_text <- 12
  corr_lab_size <- 5
  graph_text <- 18
  wrap_size <- 40
}

###SO Note:
##I had these graphics sizes for the SS3 PAP. I am putting them here in case they end up looking better in Quarto.
# ###Sizes
# axis_text <- 20
# main_text <- 20
# strip_text <- 10
# title_text <- 20
# graph_text <- 12
# wrap_size <- 30


# seeds and number of replicates for bootstraps
testing = FALSE # for simulated data
test_size <- 5000 # simulated sample size 
M <- if(testing == TRUE){999} else {9999}  
seed1 <- 123
seed2 <- 976

```

```{r}
#| label: read pilot survey data
#| include: false

filename <- "NEO - Supply Skepticism Experiment 3_May 11, 2023_14.01.csv"

col_names <- names(read_csv(here::here("data_raw", filename), n_max = 0))

D <- read_csv(here::here("data_raw", filename),  
                  col_names = col_names, 
                  col_types = cols(.default = col_character()),
                  skip=3)

D <- D %>% filter(!is.na(PID) & PID != "testPID") # drop fake & bench-test data (pre-launch), i.e., responses w/o a Forthright identifier

```

```{r }
#| label: recode pilot survey data
#| include: false

####Data is contained in env pilot

##Toggle testing mode.
testing <- F

# drop respondents who did not finish or got bumped at attention check stage.  
D <- D %>%
  filter(Finished == '1') %>%
  filter(Q2.1 == 1 & Q2.2 == "1,2")

if(testing==T){
  D <- D[sample(1:nrow(D), test_size, replace = TRUE),] # create N = test_size simulated sample
  D$ResponseId <- paste0("R", 1:nrow(D))   # new unique identifier (Qualtrics ID is replicated)
}
  
# drop embedded data fields used only in question text, and other vestigal and unnecessary stuff
D <- D %>%
  dplyr::select(- (RecipientLastName:ExternalReference), # Qualtrics unused field
         - UserLanguage, # Qualtrics unused field
         - test_API, # for testing
         - test_Q, # for testing
         - efficacy1_DO1, # used to encode display first item presented in first efficacy pair, for later retest
         - efficacy1_DO2, # used to encode display second item presented in first efficacy pair, for later retest
         - Q5.7, # used if packrat == TRUE
         - Q5.8, # used if packrat == TRUE  
         )
  
# Intro demographics section of survey
D<-D%>%
  mutate(
    age.cat=recode(Q5.1,'1'='18-29', '2'='30-44', 
                        '3'='45-64', '4'='65 plus'),
    male=as.numeric(Q5.2=='1')
  )

D$race.eth<-NA
D$race.eth[D$Q5.3=="1"]<-"White"
D$race.eth[D$Q5.3=="2"]<-"Black"
D$race.eth[D$Q5.3=="4"]<-"Asian"
## Order is important here; any Hispanic supersedes race.
D$race.eth[grep("3", D$Q5.3)]<-"Hispanic"
D$race.eth[D$Q5.3%in%c("5", "6")]<-"Multi/Other"
D$race.eth[is.na(D$race.eth)]<-"Multi/Other"

D <- D %>%
  mutate(
    tenure = Q5.6, # 1=own, 2=rent, 3=dependent, 4=other.  
    tenure = recode(tenure, 
                     '1' = 'Owner', 
                     '2' = 'Renter', 
                     '3' = 'Other',
                     '4' = 'Other',
                    ),
    ownhome = as.numeric(tenure == "Owner"), # encoding on SS2
    has.ba = as.numeric(Q5.5=='4'),
    want.price = recode(Q5.4, '1'='Higher', '2'='Same', '3'='Lower'),
    income = as.numeric(Q26.11), # not recoded, 11 point scale, `1` < $30,000, `11` > $150,000
    housing.costs = as.numeric(Q26.12), # not recoded, 14 point scale, `1` < $250/month, `14` > $20k/month
    cost.burden = housing.costs/income # very coarse measure of cost burden, dividing income on 11 point scale by (15 - housing expense on a 14 point scale))
) 

# Typical housing type as reported by respondent.
D <- D%>%
  mutate(
    hous.typ.rent = recode(Q6.1, '1'='Small_Apt', '2'='Large_Apt', '3'='Small_Townhome', '4'='Large_Townhome', '5'='Small_SFH', '6' = "Large_SFH"),
    hous.typ.own = recode(Q6.3, '1'='Small_Condo', '2'='Large_Condo', '3'='Small_Townhome', '4'='Large_Townhome', '5'='Small_SFH', '6' = "Large_SFH")
  )

# Create index of economic knowledge. Though $know.trade is a knowledge question we exclude it from the index and $know.tot b/c it is a question about effect of a politically salient policy, whereas the other questions are about non-political supply shocks. The supply shock variables are named $know.ss.x.

# in binary measures of knowledge 1 is correct and 0 is incorrect. In 3-point measures, 1 is directionally correct, - 1 is incorrect, and 0 is no change

D<-D%>%
  mutate(
    know.trade=as.numeric(Q15.1=='2'), # correct answer
    know.trade.3=as.numeric(recode(Q15.1, '1'='-1', '2'='1', '3' = '0')), # 1 = correct, -1 = incorrect, 0 = no change
    know.ss.used=as.numeric(Q16.1=='1'), # correct answer
    know.ss.used.3=as.numeric(recode(Q16.1, '1'='1', '2'='-1', '3' = '0')), # 1 = correct, -1 = incorrect, 0 = no change
    know.ss.grain=as.numeric(Q16.2=='2'), # correct answer
    know.ss.grain.3=as.numeric(recode(Q16.2, '1'='-1', '2'='1', '3' = '0')), # 1 = correct, -1 = incorrect, 0 = no change
    know.ss.wages=as.numeric(Q15.2 == '2'), # correct answer
    know.ss.wages.3=as.numeric(recode(Q15.2, '1'='-1', '2'='1', '3' = '0')), # 1 = correct, -1 = incorrect, 0 = no change
  )

know.ss.pc<-D%>%
 dplyr::select(matches("^know.ss.*3")) %>%
 drop_na() %>%
 prcomp()
pc.x<-as.data.frame(know.ss.pc$x)
names(pc.x)<-paste0("know.ss.", names(pc.x))

# assign ResponseId to know.PC.x and join to D
rep_id <- D %>%
  filter(if_all(starts_with("know.ss"), ~ !is.na(.x))) %>%
  dplyr::pull(ResponseId)
pc.x <- pc.x %>% mutate(ResponseId = rep_id) 

D <- left_join(D, pc.x, by="ResponseId")

D$know.ss.tot<-rowMeans(dplyr::select(D, names(know.ss.pc$center))) 

# Encode "DK" versions of the knowledge Qs.
D <- D %>%
  mutate(
    know.trade.dk = as.numeric(Q34.1=='2'),
    know.trade.3.dk = recode(Q34.1, '1'='Higher', '2'='Lower', '3' = 'No change', '4' = "DK"), 
    know.ss.used.dk = as.numeric(Q35.1=='1'),
    know.ss.used.3.dk = recode(Q35.1, '1'='Higher', '2'='Lower', '3' = 'No change', '4' = "DK"),
    know.ss.grain.dk = as.numeric(Q35.2=='2'),
    know.ss.grain.3.dk = recode(Q35.2, '1'='Higher', '2'='Lower', '3' = 'No change', '4' = "DK"),  
    know.ss.wages.dk = as.numeric(Q34.2 == '2'),
    know.ss.wages.3.dk = recode(Q34.2, '1'='Higher', '2'='Lower', '3' = 'No change', '4' = "DK"),  
  )

# Create subjective numeracy index. All items coded so that higher values correspond to greater subjective numeracy.

D<-D%>%
  mutate(
    num.frac = as.numeric(Q17.2),  
    num.perc = as.numeric(Q17.4),  
    num.tip = as.numeric(Q17.6),  
    num.news = as.numeric(Q17.8),
    num.numb = as.numeric(Q17.10),
    num.weather = 7 - as.numeric(Q17.12),
    num.helpful = as.numeric(Q17.14)
  )

num.pc<-D%>%
  dplyr::select(starts_with("num."))%>%
  drop_na() %>%
  prcomp()  
pc.x<-as.data.frame(num.pc$x)
names(pc.x)<-paste0("num.", names(pc.x))

# assign ResponseId to num.PC.x and join to D
rep_id <- D %>%
  filter(if_all(starts_with("num."), ~ !is.na(.x))) %>%
  dplyr::pull(ResponseId)
pc.x <- pc.x %>% mutate(ResponseId = rep_id) # CE: 2023.05.01. mysteriously, code is breaking here

D <- left_join(D, pc.x, by="ResponseId")

D$num.tot<-rowMeans(dplyr::select(D, names(num.pc$center))) 


# Pretreatment land-use knowledge
D <- D%>%
  mutate(
    know.landreg = as.numeric(Q5.10=='1') 
  )

# Pretreatment blame for high prices & rents
D <- D%>%
  mutate(
    blame.fedstate = as.numeric(str_detect(Q5.11, "1") & !str_detect(Q5.11, "1\\d")),
    blame.local = as.numeric(str_detect(Q5.11, "2")),
    blame.ibank = as.numeric(str_detect(Q5.11, "3")),
    blame.foreign = as.numeric(str_detect(Q5.11, "4")),
    blame.developer = as.numeric(str_detect(Q5.11, "5")),
    blame.landlord = as.numeric(str_detect(Q5.11, "6")),
    blame.homeowner = as.numeric(str_detect(Q5.11, "7")),
    blame.enviro = as.numeric(str_detect(Q5.11, "8")),
    blame.nimby = as.numeric(str_detect(Q5.11, "9")),
    blame.richmover = as.numeric(str_detect(Q5.11, "10")),
    blame.employer = as.numeric(str_detect(Q5.11, "11")),
  )

# engagement with local politics. 
D<-D%>%
  mutate(
    engage.votelocal=as.numeric(Q26.2=='1'),
    engage.candidates=as.numeric(Q26.3),
    engage.petition=as.numeric(grepl("1", Q26.5)),
    engage.nbhdmtg=as.numeric(grepl("2", Q26.5)),
    engage.hearing=as.numeric(grepl("3", Q26.5)),
    engage.contact=as.numeric(grepl("4", Q26.5)),
    engage.count=engage.petition+engage.nbhdmtg+engage.hearing+engage.contact
    )

engage.pc<-D%>%
  dplyr::select(engage.count, engage.votelocal, engage.candidates)%>%
  drop_na() %>%
  prcomp()  
#Assign the principal components to the dataset.
pc.x<-as.data.frame(engage.pc$x)
names(pc.x)<-paste0("engage.", names(pc.x))
rep_id <- D %>%
  filter(if_all(c(engage.count, engage.votelocal, engage.candidates), 
                ~ !is.na(.x))) %>%
  dplyr::pull(ResponseId)
pc.x <- pc.x %>% mutate(ResponseId = rep_id) 
D <- left_join(D, pc.x, by="ResponseId")
D$engage.tot<-rowMeans(dplyr::select(D, c(engage.count, engage.votelocal, engage.candidates)))
  
# partisanship, ideology, national politics
D <- D %>%
  mutate(
    repub = as.numeric(Q26.8), # 1 = Strong, 2 = Not so strong
    dem = as.numeric(recode(Q26.7, '1'='7', '2'='6')),
    ind = as.numeric(recode(Q26.9, '1'='5', '2'='3', '3'='4')),
    pid7 = coalesce(repub, dem, ind)
  ) %>%
  dplyr::select(-repub, -dem, -ind) %>%
  mutate(
    libcon=as.numeric(recode(Q26.1, '1'='-1', '2'='0', '3'='1', '4'=NULL)),
    voted20=as.numeric(Q26.2),
    pid3.nolean=recode(Q26.6, '1'='dem', '2'='rep', '3'='io', '4'='io')
    )

D$pid3.wlean<-D$pid3.nolean
D$pid3.wlean[D$Q26.9=='1']<-'dem'
D$pid3.wlean[D$Q26.9=='2']<-'rep'

## Other demographics: 

# respondent description of own neighborhood
D <- D %>%
  mutate(
    nabe.live=recode(Q26.10,
                     '1'='city_dense', 
                     '2'='city_sparse', 
                     '3'='burb_dense', 
                     '4'='burb_sparse',
                     '5'='small_town',
                     '6'='rural_farm',
                     '7'='rural_notfarm',
                     )
  )


# Predicted Effects of Regional Housing Supply Shock - home value & rent (simple format)
 D <- D %>%
  mutate( 
    shock.rento = 6 - as.numeric(Q11.1),  # variable names used on SS2
    shock.hvo = 6 - as.numeric(Q10.1), 
    )
 
 # Predicted Effects of Regional Housing Supply Shock - home value & rent (simple format) (DK)
 D <- D %>%
  mutate( 
    shock.rento.dk = 6 - as.numeric(Q39.1),  # variable names used on SS2 (DK coded as 0)
    shock.hvo.dk = 6 - as.numeric(Q38.1), 
    )
 
# Add directional coding of rent and hv predictions 
D <- D %>%
  mutate(
    shock.rents = case_when( # s for "simple," ss for "super-simple" format
      shock.rento > 3  ~ 1,
      shock.rento == 3 ~ 0,
      shock.rento < 3 ~ -1),
    shock.hvs = case_when(
      shock.hvo > 3  ~ 1,
      shock.hvo == 3 ~ 0,
      shock.hvo < 3 ~ -1),
  )

# Add directional coding of rent and hv predictions (DK)
D <- D %>%
  mutate(
    shock.rents.dk = case_when( # s for "simple," ss for "super-simple" format
      shock.rento.dk > 3  ~ "Higher",
      shock.rento.dk == 3 ~ "No change",
      shock.rento.dk < 3 & shock.rento.dk > 0 ~ "Lower",
      shock.rento.dk == 0 ~ "DK"),
    shock.hvs.dk = case_when(
      shock.hvo.dk > 3  ~ "Higher",
      shock.hvo.dk == 3 ~ "No change",
      shock.hvo.dk < 3 & shock.hvo.dk > 0 ~ "Lower",
      shock.hvo.dk == 0 ~ "DK"),
  )
 
# Predicted Effects of Regional Shock - home value & rent (supersimple format)
 D <- D %>%
  mutate( 
    shock.rentss = coalesce(Q13.5, Q14.1),
    shock.rentss = as.numeric(recode(shock.rentss, '1' = '1', '2' = '-1', '3' = '0')), 
    shock.hvss = coalesce(Q13.1, Q14.5),
    shock.hvss = as.numeric(recode(shock.hvss, '1' = '1', '2' = '-1', '3' = '0')), 
    )
 
 # Predicted Effects of Regional Shock - home value & rent (supersimple format) (DK)
 D <- D %>%
  mutate( 
    shock.rentss.dk = coalesce(Q41.1, Q40.5),
    shock.rentss.dk = recode(shock.rentss.dk, '1' = 'Higher', '2' = 'Lower', '3' = 'No change', '4' = 'DK'), 
    shock.hvss.dk = coalesce(Q40.1, Q41.5),
    shock.hvss.dk = recode(shock.hvss.dk, '1' = 'Higher', '2' = 'Lower', '3' = 'No change', '4' = 'DK'), 
    )
 
# Coalesce with directional predictions from the simple format 
D <- D %>%
  mutate(
    shock.rentdir = coalesce(shock.rents, shock.rentss),
    shock.hvdir = coalesce(shock.hvs, shock.hvss)
  )

# Coalesce with directional predictions from the simple format (DK)
D <- D %>%
  mutate(
    shock.rentdir.dk = coalesce(shock.rents.dk, shock.rentss.dk),
    shock.hvdir.dk = coalesce(shock.hvs.dk, shock.hvss.dk)
  )

# Directional confidence
 D <- D %>%
  mutate(
    shock.hv.conf = 
      as.numeric(
        coalesce(Q10.2, Q10.3, Q10.4,
               Q13.2, Q13.3, Q13.4,
               Q14.6, Q14.7, Q14.8)
      ),  
    shock.rent.conf = 
      as.numeric(
        coalesce(Q10.2, Q10.3, Q10.4, 
               Q13.6, Q13.7, Q13.8,
               Q14.2, Q14.3, Q14.4)
    )
  )
 
 # Directional confidence (DK)
 D <- D %>%
  mutate(
    shock.hv.conf.dk = 
      as.numeric(
        coalesce(Q38.2, Q38.3, Q38.4,
               Q40.2, Q40.3, Q40.4,
               Q41.6, Q41.7, Q41.8)
      ),  
    shock.rent.conf.dk = 
      as.numeric(
        coalesce(Q39.2, Q39.3, Q39.4, 
               Q40.6, Q40.7, Q40.8,
               Q41.2, Q41.3, Q41.4)
    )
  )
 
# neighborhood housing shock (rents only)
D <- D %>%
  mutate(
    nabe.shock.rent = as.numeric(recode(Q8.1, '1' = '1', '2' = '-1', '3' = '0'))
  )

# neighborhood housing shock (rents only) (DK)
D <- D %>%
  mutate(
    nabe.shock.rent.dk = recode(Q36.1, '1' = 'Higher', '2' = 'Lower', '3' = 'No change', '4' = "DK")
  )

# Policy efficacy

# first move loop number to end of column name
names(D) <- sub("^(\\d+)_(.*)$", "\\2_\\1", names(D))

# break the display order strings into separate columns for each choice option
D <- D %>%
  separate_wider_delim(
    cols = contains("_DO_"),
    delim = "|",
    names_sep = "_c"
  )

# remove "_DO_" from names of looped columns, now split
D <- D %>%
  rename_with(~gsub("_DO", "", .), matches("_DO_\\d+"))

# CN policy efficacy & support
D <- D %>%
  rename_with(~gsub("Q19.1", "efficacy_CN", .), matches("Q19.1")) 
D <- D %>%
  rename_with(~gsub("Q22.1", "support_CN", .), matches("Q22.1")) 
D <- D %>% 
  mutate(
    across(matches("support_CN_\\d{1,2}$"), ~ 6 - as.numeric(.))
  )

# CE policy efficacy & support
D <- D %>%
  rename_with(~gsub("Q23.1", "efficacy_CE", .), matches("Q23.1")) 
D <- D %>%
  rename_with(~gsub("Q24.1", "support_CE", .), matches("Q24.1")) 
D <- D %>% 
  mutate(
    across(matches("support_CE_\\d{1,2}$"), ~ 6 - as.numeric(.))
  )

# assign legible names to the policy preference data

D <- D %>% 
 mutate(
    sup.build.apt.CN = support_CN_1,
    sup.build.sfh.CN = support_CN_2,
    sup.IZ.low.CN = support_CN_3,
    sup.IZ.mid.CN = support_CN_4,
    sup.ban.airbnb.CN = support_CN_5,
    sup.ban.foreign.CN = support_CN_6,
    sup.ban.corp.CN = support_CN_7,
    sup.rent.control.CN = support_CN_8,
    sup.prop.tax.control.CN = support_CN_9,
    sup.pub.housing.CN = support_CN_10,
    sup.vouchers.CN = support_CN_11,
    sup.renter.break.CN = support_CN_12,
    sup.dev.approval.rule.CN = support_CN_13,
    sup.dev.approval.env.CN = support_CN_14,
    sup.dev.approval.fast.CN = support_CN_15,
    sup.fee.removal.CN = support_CN_16,
    sup.ll.vacancy.tax.CN = support_CN_17,
    sup.ho.vacancy.tax.CN = support_CN_18, 
    sup.demo.ban.CN = support_CN_19,
    sup.demo.replace.CN = support_CN_20,
    sup.tax.flip.CN = support_CN_21,
    sup.land.bank.CN = support_CN_22,
    sup.bus.hous.CN = support_CN_23,
    sup.less.env.reg.CN = support_CN_24,
    sup.border.wall.CN = support_CN_25,
    sup.medicare.all.CN = support_CN_26,
    sup.high.min.wage.CN = support_CN_27,
    sup.low.gas.tax.CN = support_CN_28,
    sup.free.loan.CN = support_CN_29, 
    sup.ADU.CN = support_CN_30
 )

D <- D %>% 
 mutate( # encoding of these had errors. corrected 2024.04.010. (Would have been more efficient/less error prone to match by num to ce_names)
   sup.upzone.apt.CE = support_CE_1,
   sup.upzone.sfh.CE = support_CE_2, 
   sup.IZ.low.10.CE = support_CE_3,
   sup.IZ.low.50.CE = support_CE_4,
   sup.IZ.mid.10.CE = support_CE_5,
   sup.IZ.mid.50.CE = support_CE_6,
   sup.ban.airbnb.CE = support_CE_7,
   sup.ban.foreign.CE = support_CE_8,
   sup.ban.corp.CE = support_CE_9,
   sup.rent.control.02.CE = support_CE_10,
   sup.rent.control.vc.CE = support_CE_11,
   sup.prop.tax.control.02.CE = support_CE_12,
   sup.pub.housing.CE = support_CE_13,
   sup.vouchers.CE = support_CE_14,
   sup.renter.break.CE = support_CE_15,
   sup.dev.approval.rule.CE = support_CE_16,
   sup.dev.approval.env.CE = support_CE_17,
   sup.dev.approval.fast.CE = support_CE_18,
   sup.fee.removal.CE = support_CE_19,
   sup.ll.vacancy.tax.CE = support_CE_20,
   sup.ho.vacancy.tax.CE = support_CE_21,
   sup.demo.ban.CE = support_CE_22,
   sup.demo.replace.CE = support_CE_23,
   sup.tax.flip.CE = support_CE_24,
   sup.land.bank.CE = support_CE_25,
   sup.com.link.fee.CE = support_CE_26,
   sup.bus.hous.CE = support_CE_27,
   sup.less.env.reg.CE = support_CE_28,
   sup.border.wall.CE = support_CE_29,
   sup.medicare.all.CE = support_CE_30,
   sup.high.min.wage.CE = support_CE_31,
   sup.low.gas.tax.CE = support_CE_32,
   sup.free.loan.CE = support_CE_33,
   sup.build.sfh.CE = support_CE_34,
   sup.build.apt.CE = support_CE_35,
   sup.ADU.CE = support_CE_36
 )   

# coalesce CN and CE responses where question is identical

# load policies and remove extraneous information
ce_names <- read_csv(here::here("data_raw", "CE_policies.csv")) %>% 
  mutate(policy = gsub("^o ", "",policy),
         policy = str_remove(policy, "\\s*\\(\\d+\\)$"), # remove digits and parens from item
         num=as.character(num)
         ) 
cn_names <- read_csv(here::here("data_raw", "cn_policies.csv")) %>% 
  mutate(policy = gsub("^o ", "",policy),
         policy = str_remove(policy, "\\s*\\(\\d+\\)$"), # remove digits and parens from item
         num=as.character(num)
         ) 

# find the identical policies in each question set 
cn_names$policy[which(cn_names$policy %in% ce_names$policy)]

# coalesce by hand the policies that are phrased identically or essentially identically in the CN and CE battieries (not sure how to code automatic select & coalesce of  "matching" policies); then remove "sup." encoding of the redundant CN and CE obs.
D <- D %>% 
 mutate(
   sup.build.apt = dplyr::coalesce(sup.build.apt.CN, sup.build.apt.CE),
   sup.ban.airbnb = dplyr::coalesce(sup.ban.airbnb.CN, sup.ban.airbnb.CE),
   sup.ban.foreign = dplyr::coalesce(sup.ban.foreign.CN, sup.ban.foreign.CE),
   sup.ban.corp = dplyr::coalesce(sup.ban.corp.CN, sup.ban.corp.CE), 
   sup.pub.housing = dplyr::coalesce(sup.pub.housing.CN, sup.pub.housing.CE),
   sup.vouchers = dplyr::coalesce(sup.vouchers.CN, sup.vouchers.CE),
   sup.renter.break = dplyr::coalesce(sup.renter.break.CN, sup.renter.break.CE), 
   sup.fee.removal = dplyr::coalesce(sup.fee.removal.CN, sup.fee.removal.CE),
   sup.ll.vacancy.tax = dplyr::coalesce(sup.ll.vacancy.tax.CN, sup.ll.vacancy.tax.CE),
   sup.ho.vacancy.tax = dplyr::coalesce(sup.ho.vacancy.tax.CN, sup.ho.vacancy.tax.CE),
   sup.tax.flip = dplyr::coalesce(sup.tax.flip.CN, sup.tax.flip.CE),
   sup.land.bank = dplyr::coalesce(sup.land.bank.CN, sup.land.bank.CE),
   sup.bus.hous = dplyr::coalesce(sup.bus.hous.CN, sup.bus.hous.CE),
   sup.less.env.reg = dplyr::coalesce(sup.less.env.reg.CN, sup.less.env.reg.CE),
   sup.border.wall = dplyr::coalesce(sup.border.wall.CN, sup.border.wall.CE),
   sup.medicare.all = dplyr::coalesce(sup.medicare.all.CN, sup.medicare.all.CE),
   sup.high.min.wage = dplyr::coalesce(sup.high.min.wage.CN, sup.high.min.wage.CE),
   sup.low.gas.tax = dplyr::coalesce(sup.low.gas.tax.CN, sup.low.gas.tax.CE),
   sup.free.loan = dplyr::coalesce(sup.free.loan.CN, sup.free.loan.CE),
   sup.ADU = dplyr::coalesce(sup.ADU.CN, sup.ADU.CE)
 ) %>%
  dplyr::select(-c(
    sup.build.apt.CN, sup.build.apt.CE,
    sup.ban.airbnb.CN, sup.ban.airbnb.CE,
    sup.ban.foreign.CN, sup.ban.foreign.CE,
    sup.ban.corp.CN, sup.ban.corp.CE,
    sup.pub.housing.CN, sup.pub.housing.CE,
    sup.vouchers.CN, sup.vouchers.CE,
    sup.renter.break.CN, sup.renter.break.CE,
    sup.fee.removal.CN, sup.fee.removal.CE,
    sup.ll.vacancy.tax.CN, sup.ll.vacancy.tax.CE,
    sup.ho.vacancy.tax.CN, sup.ho.vacancy.tax.CE,
    sup.tax.flip.CN, sup.tax.flip.CE,
    sup.land.bank.CN, sup.land.bank.CE,
    sup.bus.hous.CN, sup.bus.hous.CE,
    sup.less.env.reg.CN, sup.less.env.reg.CE,
    sup.border.wall.CN, sup.border.wall.CE,
    sup.medicare.all.CN, sup.medicare.all.CE,
    sup.high.min.wage.CN, sup.high.min.wage.CE,
    sup.low.gas.tax.CN, sup.low.gas.tax.CE,
    sup.free.loan.CN, sup.free.loan.CE,
    sup.ADU.CN, sup.ADU.CE)
    )

# create full policy description version of the "support" columns. This is what the respondents' saw. The ordering of the new-named DVs is by descending order of the popularity of the nearest corresponding item on our March 2024 survey, except that non-housing items and items not included on the March 2024 survey come at the end
D <- D %>% 
 mutate(
   `Limit annual property tax increases` = sup.prop.tax.control.CN,
   `Prohibit local governments from raising a homeowner's property taxes by more than 2% per year` = sup.prop.tax.control.02.CE,
   `Adopt laws limiting rent increases` = sup.rent.control.CN,
   `Prohibit landlords from making a new tenant pay higher rents than the previous tenant` = sup.rent.control.vc.CE,
   `Prohibit landlords from raising a tenant's rent by more than 2% per year` = sup.rent.control.02.CE,
   `Give first-time homebuyers a no-interest government loan for their down payment` = sup.free.loan,
   `Prohibit investment banks and corporations from buying homes` = sup.ban.corp,   
   `Require apartment developers to rent some units to middle-income people` = sup.IZ.mid.CN,
   `Require developers of housing projects to sell or rent at least 10% (1 in 10) of the new homes to middle-income people` =
     sup.IZ.mid.10.CE,
   `Require developers of housing projects to sell or rent at least 50% (one half) of the new homes to middle-income people`=
     sup.IZ.mid.50.CE,
   `Require apartment developers to rent some units to low-income people` = sup.IZ.low.CN,
   `Require developers of housing projects to sell or rent at least 10% (1 in 10) of the new homes to low-income people` =  
     sup.IZ.low.10.CE,
   `Require developers of housing projects to sell or rent at least 50% (one half) of the new homes to low-income people` =
     sup.IZ.low.50.CE, 
   `Give renters a tax break` = sup.renter.break,
   `Guarantee approval of housing developments that follow rules` = sup.dev.approval.rule.CN,
   `Make local governments approve housing development projects that comply with objective standards` = sup.dev.approval.rule.CE,
   `Require local governments to promptly approve or reject housing proposals` = sup.dev.approval.fast.CN,
   `Require local governments to approve or deny housing development projects quickly` = sup.dev.approval.fast.CE,
   `End special fees and taxes on housing development` = sup.fee.removal,
   `Increase public spending on low-income housing projects` = sup.pub.housing,
   `Increase public spending on low-income rent vouchers (\"Section 8\")` = sup.vouchers,
   `Have the government buy up property to develop later as affordable housing` = sup.land.bank,
   `Build more single-family homes on open land` = sup.build.sfh.CE,
   `Change zoning to allow more single-family homes on open land` = sup.upzone.sfh.CE,
   `Build more apartments in single-family neighborhoods` = sup.build.apt,
   `Change zoning to allow more apartments in existing neighborhoods` = sup.upzone.apt.CE,
   `Change zoning to allow homeowners to build an \"accessory dwelling unit\" (small house or apartment) in their backyard` = 
     sup.ADU, 
   `Reduce environmental regulations on housing development` = sup.less.env.reg,
   `Prohibit developers of new housing from tearing down old apartment buildings` = sup.demo.ban.CN,
   `Prohibit developers from tearing down old apartment buildings if a tenant lived there recently` = sup.demo.ban.CE,
   `Require developers of new housing who tear down buildings to give prior low-income residents of those buildings a place to live` = sup.demo.replace.CN,
   `Require developers who tear down an apartment building to provide its low-income tenants with other housing at an affordable rent` =  sup.demo.replace.CE,
   `Prevent misuse of environmental lawsuits by opponents of housing proposals` = sup.dev.approval.env.CN,
   `Make people who file frivolous lawsuits against housing development pay for the costs they impose on others` = 
     sup.dev.approval.env.CE, # check this when you get back into Qualtrics
   `Prohibit short-term vacation rentals (AirBnB, VRBO)` = sup.ban.airbnb,
   `Prohibit foreign investors from buying homes` = sup.ban.foreign,
   `Tax landlords if they leave an apartment vacant` = sup.ll.vacancy.tax,
   `Tax homeowners if they leave their homes unoccupied` = sup.ho.vacancy.tax,
   `Tax the profits of \"flippers\"--those who fix up and sell older properties` = sup.tax.flip,
   `Make developers of office and commercial buildings pay for housing that future workers will need` = sup.com.link.fee.CE,
   `Build a border wall to keep illegal immigrants out of the United States` = sup.border.wall,
   `Pass \"Medicare for All\" to give health insurance to every American` = sup.medicare.all,
   `Raise the minimum wage` = sup.high.min.wage,
   `Reduce gas taxes` = sup.low.gas.tax,
 )



# remove efficacy choice `3` columns (no opinion), and recode no-opinion selection
D <- D %>%
  dplyr::select(- matches("^efficacy.*c3$")
         ) %>%
  mutate(
    across(starts_with("efficacy_CN"), 
         ~case_when(. == "31" ~ "no_opinion", 
                    TRUE ~ .)),
    across(starts_with("efficacy_CE"), 
         ~case_when(. == "37" ~ "no_opinion", 
                    TRUE ~ .)),
         ) 

 
# code the retest questions
D <- D %>% 
  mutate(
    re.shock.rentss = coalesce(Q27.1, Q27.2),
    re.shock.rentss = as.numeric(recode(re.shock.rentss, '1' = '1', '2' = '-1', '3' = '0')),
    re.nabe.shock.rent = as.numeric(recode(Q31.1, '1' = '1', '2' = '-1', '3' = '0')),
    re.developers = as.numeric(recode(Q28.1, '1' = '-1', '2' = '1', '3' = '0')), # developers cause higher prices = 1, no opinion = 0, don't cause higher prices as -1. (For retest, pool respondents who predicted higher prices and no change on original rents question.) 
    re.know.trade = as.numeric(Q29.1=='2'),
    re.know.trade.3 = as.numeric(recode(Q29.1, '1'='-1', '2'='1', '3' = '0')), 
    re.know.ss.used = as.numeric(Q30.1=='1'),
    re.know.ss.used.3 = as.numeric(recode(Q30.1, '1'='1', '2'='-1', '3' = '0')),  
    re.know.ss.grain = as.numeric(Q30.2=='2'),
    re.know.ss.grain.3 = as.numeric(recode(Q30.2, '1'='-1', '2'='1', '3' = '0')),  
    re.know.ss.wages = as.numeric(Q29.2 == '2'),
    re.know.ss.wages.3 = as.numeric(recode(Q29.2, '1'='-1', '2'='1', '3' = '0')), 
    re.efficacy_1 = Q20.1 # this is the retest for the first pair (order reversed) for both CN and CE versions of the efficacy Qs. '1' = second item in first pair,  '2' = first item in first pair, '3' = no opinion
  )

# code the retest questions (DK)
D <- D %>% 
  mutate(
    re.shock.rentss.dk = coalesce(Q42.1, Q42.2),
    re.shock.rentss.dk = recode(re.shock.rentss.dk, '1' = 'Higher', '2' = 'Lower', '3' = 'No change', '4' = "DK"),
    re.nabe.shock.rent.dk = recode(Q37.1 , '1' = 'Higher', '2' = 'Lower', '3' = 'No change', '4' = "DK")
  )

# Rescale all scaled variables so that they're mean 0, standard deviation 1
D <- D %>%
  mutate(across(contains(".PC"), scale))

# check signs of first principal components and flip where necessary so that larger values correspond to more of the latent quantity  
 cor(D$know.ss.PC1, D$know.ss.tot, use = "complete.obs") # should be positive (2023.06.23 it's not) 
 cor(D$engage.PC1, D$engage.tot, use = "complete.obs")  # should be positive
 cor(D$num.PC1, D$num.tot, use = "complete.obs") # should be positive (2023.06.23 it's not)
 
 # check correlations of first principal components with each index item, then take absolute value so that larger values correspond to more of the latent quantity

D%>% dplyr::select(know.trade.3, 
                know.ss.used.3,
                know.ss.grain.3,
                know.ss.wages.3,
                know.ss.tot,
                know.ss.PC1) %>%
  corrr::correlate() %>%
  shave(upper = TRUE) %>%
  fashion(decimals = 2, na_print = "—") # cor(D$know.ss.PC1, D$know.ss.tot) is positive

D %>% dplyr::select(engage.votelocal,
                    engage.candidates,
                    engage.petition,
                    engage.nbhdmtg,
                    engage.hearing,
                    engage.contact,
                    engage.count,
                    engage.PC1,
                    engage.tot) %>%
  corrr::correlate() %>%
  shave(upper = TRUE) %>%
  fashion(decimals = 2, na_print = "—") # cor(D$engage.PC1, D$engage.tot) is neg 

D %>% dplyr::select(num.frac,
                    num.perc,
                    num.tip,
                    num.news,
                    num.numb,
                    num.weather,
                    num.helpful,
                    num.tot,
                    num.PC1
                    ) %>%
  corrr::correlate() %>%
  shave(upper = TRUE) %>%
  fashion(decimals = 2, na_print = "—") # cor(D$num.PC1, D$num.tot, use = "complete.obs")  is pos and strong; pos cor among all items in index

D <- D %>% mutate(across(ends_with(".PC1"), ~ abs(.))) # convert PC1s so that they have positive signs. 2023.06.25


# Exclude speeders
D <- D %>% 
  rename(duration_in_seconds = `Duration (in seconds)`)

survey.time <- D %>% 
  filter(Finished == "1") %>%
  mutate(
    seconds = as.numeric(duration_in_seconds),
    minutes = factor(case_when(
      seconds < 5*60 ~ "Less than 5 min",
      seconds >= 5*60 & seconds < 7*60 ~ "5-7 min",
      seconds >= 7*60 & seconds < 9*60 ~ "7-9 min",
      seconds >= 9*60 & seconds < 11*60 ~ "9-11 min",
      seconds >= 11*60 & seconds < 15*60 ~ "11-15 min",
      seconds >= 15*60 & seconds < 20*60 ~ "15-20 min",
      seconds >= 20*60 ~ "More than 20 min"
    ), levels = c("Less than 5 min","5-7 min","7-9 min","9-11 min","11-15 min","15-20 min","More than 20 min"))
  ) %>% 
  group_by(minutes) %>%
  summarize(n = n())

D <- D %>% mutate(duration_in_seconds = as.numeric(duration_in_seconds))

D %>%
  mutate(
    speeders = duration_in_seconds <= median(duration_in_seconds, na.rm = TRUE)/3
    ) %>%
  group_by(speeders) %>%
  summarize(n = n())

# exclude respondents who completed survey in less than 1/3 of median survey time
D <- D %>%
  filter(duration_in_seconds > median(D$duration_in_seconds/3, na.rm = TRUE))

# create new environment for data from pilot survey
pilot <- new.env()
pilot$D <- D

   
```

# Introduction

*This document has the replication code for the paper, \`\`What State Housing Policies Do Voters Want? Evidence from a Platform-Choice Experiment.'' The non-italicized text in this document is the original text from our preanalysis plan. Annotations in the code chunks and/or italiczed text are used to explain any deviations between the code in the preanalysis plan and the code used for the paper.*

*The survey instrument that generated the data for this paper also included some experiments that are being written up as a separate paper. Those portions of the survey came after the portions used in this paper. The description of those portions of the survey, and the corresponding analysis code, are omitted from this replication document.*

*Note that our preanalysis plan defined target subgroups and provided code for all analyses, but not code for figures.*

In previous work on public understanding of housing markets, we have found that most U.S. metro residents do not believe that a large, positive exogenous shock to their region's supply of housing would cause lower home prices or rents [@NalElmOkl22]. We also discovered that views about the price effects of both regional and local housing-supply shocks are not strongly held, as many respondents give inconsistent answers to substantively identical questions about these topics, both within and across surveys.

The present survey expands on our prior work in three directions. First, we seek to understand what kinds of policies respondents believe *would* be effective for increasing the availability and affordability of housing. We compare demand subsidies, price controls, supply-side policies targeting market-rate housing, and supply-side policies targeting subsidized affordable housing. We elicit perceptions of the relative efficacy of the policies, and support for the policies.

Second, we seek to understand whether public support for economically dubious "housing affordability" policies, such as demand subsidies and price controls, matters politically. Adapting the design of @SidTauVav22, we construct a revealed-preference measure of issue importance from a conjoint experiment. Respondents choose between two 3-policy sets, "Set A" and "Set B." The policies in the sets are drawn from a master list of 17 housing and 22 non-housing issues. The former represent our understanding of the major questions in state-level debates about housing and land-use, and the latter are intended to capture many of the salient non-housing policy issues that surface in state elections. In each matchup, the same issues appear in both sets but with contrary positions (e.g., when set A includes "reduce restrictions on abortion," set B includes "increase restrictions on abortion"). Prior to the conjoint experiment, respondents are asked which policy directions they favor on the policies that have been randomly selected for their conjoint profiles. An issue's relative "importance" is the average effect of (1) the inclusion in a choice set of the respondent's position on that issue on (2) the probability that that choice set is selected.

We will report the perceived efficacy, policy-preference, and policy-importance results for the full sample, as well as subgroups defined by tenure (homeowners vs. renters), by subjective desire for future home prices and rents (lower or not-lower), by party identificaiton (Republicans vs. Democrats), and by whether the respondent identifies themselves as a member of a housing "issue public." We categorize respondents as belonging to the housing issue public using both a closed-form question and the free-text method of @RyaEhl23.

# Survey Design

This section of the PAP lays out the structure of the survey and provide the text of the focal survey questions.

```{r}
#| label: define policy set
#| include: false

# This code chunk defines the policy set used in the efficacy, policy preference, and conjoint blocks on the survey. It also encodes the randomization protocol. Chat GPT3.5 was ued to translate the randomization into JavaScript for use in Qualtrics. 

# The `plot_shorthand` and `policy_type` columns were added to the `issues` tibble post-PAP to facilitate plotting of policies with an easy-to-understanding shorthand. (The categories for policy_type, and the assignment of policies to the categories, was made in the PAP.)

issues.rand <- issues <- data.frame(
  code = c(
    "Immigration", 
    "Prescription_Drugs", 
    "Abortion",
    "Education",
    "Climate_Change",
    "Nature",
    "Taxes_Middle",
    "Taxes_Upper",
    "Gun_Control",
    "Marijuana",
    "Minimum_Wage",
    "Elections",
    "Health_Care",
    "Technology",
    "Labor",
    "Arts",
    "Crime",
    "Police",
    "Discrimination_Minority",
    "Discrimination_White",
    "GMOs",
    "Taxes_Gas",
    "Housing_Build_MR_Infill",
    "Housing_Build_MR_Open",
    "Housing_Build_BMR_Infill",
    "Housing_Build_BMR_Open", 
    "Housing_Spend_BMR",
    "Housing_Fees",
    "Housing_Rent_Control",
    "Housing_Property_Taxes",
    "Housing_Permitting",
    "Housing_Section_8",
    "Housing_Renter_Help",
    "Housing_Buyer_Help",
    "Housing_IZ_Middle",
    "Housing_IZ_Lower",
    "Wall_Street",
    "Land_Bank",
    "Parking"),
  plot_shorthand = c( # This is a shorthand for "lib_position," to be used when plotting results. Change these labels only by doing search-and-replace, b/c they're also hard-coded in a couple of other spots in this document. SO: this is what we should modify as needed for geom_text() plots. 
    "More Immigration", 
    "Cap Drug Prices", 
    "Reduce Barriers to Abortion",
    "More Education Spending",
    "Stricter GHG Regs",
    "More Nature Preserves",
    "No Middle-Class Tax Cut",
    "Tax the Rich",
    "More Gun Control",
    "Legalize Marijuana",
    "Higher Minimum Wage",
    "Reduce Barriers to Voting",
    "More Health Spending",
    "Stricter Technology Regs",
    "Easier Unionization",
    "More Arts Spending",
    "Reduce Criminal Sentences",
    "Less Police",
    "More Penalty for Anti-POC Discrim.",
    "Less Penalty for Anti-White Discrim.",
    "Stricter GMO Regs",
    "Higher Gas Taxes",
    "Allow More MR Infill", 
    "Allow More MR Sprawl",
    "Allow More BMR Infill",
    "Allow More BMR Sprawl",
    "Increase BMR Spending",
    "Reduce Dev. Fees",
    "Rent Control",
    "Property-Tax Control",
    "Nondiscretionary Permits",
    "More Housing Vouchers",
    "Renter Tax Break",
    "Down-Payment Subsidy",
    "Middle-Income IZ",
    "Low-Income IZ",
    "Restrict Wall-St. Buyers",
    "Restrict MR for Future BMR",
    "Reduce Parking Minimums"),
  lib_position = c(
    "Allow more immigration",
    "Cap prices that drug companies charge",
    "Reduce restrictions on abortion",
    "Increase government spending on K-12 public schools",
    "Increase regulation of greenhouse-gas emissions",
    "Preserve more land for parks and nature",
    "No tax cuts for the middle class",
    "Increase taxes on the wealthy",
    "Increase gun regulations",
    "Legalize marijuana use",
    "Raise the minimum wage",
    "Reduce barriers to voting",
    "Increase government spending on health insurance for lower-income people",
    "Increase regulation of social media and artificial intelligence",
    "Make it easier for workers to form unions",
    "Increase government spending on the arts",
    "Reduce prison sentences for violent crimes",
    "Reduce the size of police forces",
    "Increase penalties for discrimination against racial minorities",
    "Reduce penalties for discrimination against white people",
    "Increase regulation of Genetically Modified Organisms (GMOs)",
    "Increase gas taxes",
    "Allow more market-rate apartments in existing neighborhoods",
    "Allow more market-rate homes on open land",
    "Allow more subsidized affordable apartments in existing neighborhoods",
    "Allow more subsidized affordable homes on open land",
    "Increase government spending on building affordable housing",
    "Reduce fees and taxes on housing development",
    "Limit how much landlords may increase rents",
    "Limit how much cities may increase property taxes",
    "Make cities approve housing proposals that comply with city rules",
    "Provide more housing vouchers (''Section 8'') to low-income people",
    "Give renters a tax break",
    "Give first-time homebuyers a no-interest government loan for their down payment",
    "Require housing developments to include affordable housing for middle-income people",
    "Require housing developments to include affordable housing for low-income people",
    "Restrict Wall Street investors from buying up homes",
    "Restrict development of market-rate housing on sites that could be developed for subsidized affordable housing in the future",
    "Reduce requirements for off-street parking in new developments"),
  con_position = c(
    "Allow less immigration",
    "Let drug companies charge any price they want",
    "Increase restrictions on abortion",
    "Reduce government spending on K-12 public schools",
    "Decrease regulation of greenhouse-gas emissions",
    "Preserve less land for parks and nature",
    "Cut taxes on the middle class",
    "Reduce taxes on the wealthy",
    "Reduce gun regulations",
    "Criminalize marijuana use",
    "Lower the minimum wage", 
    "Increase safeguards against voter fraud",
    "Reduce government spending on health insurance for lower-income people",
    "Reduce regulation of social media and artificial intelligence",
    "Make it harder for workers to form unions",
    "Reduce government spending on the arts",
    "Increase prison sentences for violent crimes",
    "Hire more police officers",
    "Reduce penalties for discrimination against racial minorities",
    "Increase penalties for discrimination against white people",
    "Reduce regulation of Genetically Modified Organisms (GMOs)",
    "Reduce gas taxes",
    "Restrict building of market-rate apartments in existing neighborhoods",
    "Restrict building of market-rate homes on open land",
    "Restrict building of subsidized affordable apartments in existing neighborhoods",
    "Restrict building of subsidized affordable homes on open land",
    "Reduce government spending on building affordable housing",
    "Increase fees and taxes on housing development",
    "Allow landlords to increase rents as they wish",
    "Allow cities to increase property taxes as they wish",
    "Allow cities to reject any housing proposal that local officials don’t like",
    "Provide fewer low-income ''Section 8'' rent vouchers",
    "No special tax breaks for renters",
    "No special loans for first-time homebuyers",
    "Let developers decide what to charge for new housing",
    "Let developers decide what to charge for new housing",
    "Allow Wall Street investors to buy up homes",
    "Allow development of market-rate housing on any site where housing is normally allowed",
    "Increase requirements for off-street parking in new developments")
)

# add houing policy type (@SO: I added this to the master issues table so as to minimize stray additions later in the code)
issues <- issues %>% 
  mutate(
    policy_type = case_when(
      lib_position %in% c("Allow more market-rate apartments in existing neighborhoods",
                          "Allow more market-rate homes on open land"
                          ) ~ "Supply Side - MR",
      lib_position %in% c("Allow more subsidized affordable apartments in existing neighborhoods",
                          "Allow more subsidized affordable homes on open land",
                          "Increase government spending on building affordable housing"
                          ) ~ "Supply Side - BMR",
      lib_position %in% c("Reduce fees and taxes on housing development",
                          "Make cities approve housing proposals that comply with city rules",
                          "Reduce requirements for off-street parking in new developments"
                          ) ~ "Supply Side - Untargeted",
      lib_position %in% c("Limit how much landlords may increase rents",
                          "Limit how much cities may increase property taxes",
                          "Require housing developments to include affordable housing for middle-income people",
                          "Require housing developments to include affordable housing for low-income people"
                          ) ~ "Price Control",
      lib_position %in% c("Provide more housing vouchers (''Section 8'') to low-income people",
                          "Give renters a tax break",
                          "Give first-time homebuyers a no-interest government loan for their down payment"
                          ) ~ "Demand Subsidy",
      lib_position %in% c("Restrict Wall Street investors from buying up homes",
                          "Restrict development of market-rate housing on sites that could be developed for subsidized affordable housing in the future"
                          ) ~ "Demand Fence",
      code %in% c("Prescription_Drugs", "Education", "Taxes_Middle", "Taxes_Upper", "Technology", "Minimum_Wage", "Health_Care", "Technology", "Labor", "Taxes_Gas"
                  ) ~ "Economic",
      code %in% c("Immigration", "Abortion", "Gun_Control","Marijuana",
            "Elections", "Arts",  "Crime","Police",                  
            "Discrimination_Minority",  "Discrimination_White",    
            "GMOs", "Climate_Change", "Nature"
            ) ~ "Social"# @SO: I moved climate and nature into the "social" category
      )
    )

# Sample indexes for a respondent
indexes <- sample(1:nrow(issues.rand), 5, replace = FALSE)

# Loop to sample 5 issues and record the corresponding code, description, lib_position, and con_position
for(i in 1:5){
  row_index <- indexes[i]
  
# Shuffle lib_position and con_position within the selected row
issues.rand[row_index, c("lib_position", "con_position")] <- 
  sample(issues.rand[row_index, c("lib_position", "con_position")])
  
  # Record the corresponding code, description, lib_position, and con_position
  assign(paste0("i_code_", i), issues.rand$code[row_index])
  assign(paste0("i_description_", i), issues.rand$description[row_index])
  assign(paste0("i_position_A_", i), issues.rand$lib_position[row_index])
  assign(paste0("i_position_B_", i), issues.rand$con_position[row_index])
}


# assign levels to housing policies, for use later in code. This uses plot_shorthand.
policy_levels <- c(
    "Allow More MR Infill",
    "Allow More MR Sprawl",
    "Allow More BMR Infill",
    "Allow More BMR Sprawl", 
    "Increase BMR Spending",
    "Reduce Dev. Fees",
    "Rent Control",
    "Property-Tax Control",
    "Nondiscretionary Permits",
    "More Housing Vouchers",
    "Renter Tax Break",
    "Down-Payment Subsidy",
    "Middle-Income IZ",
    "Low-Income IZ",
    "Restrict Wall-St. Buyers",
    "Restrict MR for Future BMR",
    "Reduce Parking Minimums",
    "More Immigration", 
    "Cap Drug Prices", 
    "Reduce Barriers to Abortion",
    "More Education Spending",
    "Stricter GHG Regs",
    "More Nature Preserves",
    "No Middle-Class Tax Cut",
    "Tax the Rich",
    "More Gun Control",
    "Legalize Marijuana",
    "Higher Minimum Wage",
    "Reduce Barriers to Voting",
    "More Health Spending",
    "Stricter Technology Regs",
    "Easier Unionization",
    "More Arts Spending",
    "Reduce Criminal Sentences",
    "Less Police",
    "More Penalty for Anti-POC Discrim.",
    "Less Penalty for Anti-White Discrim.",
    "Stricter GMO Regs",
    "Higher Gas Taxes"
    )

# sort by policy_level; make levels for policy_type (aid to future plottin)
issues <- issues %>%
  mutate(plot_shorthand = factor(plot_shorthand, levels = policy_levels),
         policy_type = factor(policy_type, levels = c("Supply Side - MR",
                                                      "Supply Side - BMR",
                                                      "Supply Side - Untargeted",
                                                      "Price Control",
                                                      "Demand Subsidy",
                                                      "Demand Fence",
                                                      "Social",
                                                      "Economic"))
         ) %>%
  arrange(plot_shorthand)

```

## Block 1: Demographics and Issue Priorities

The survey begins with a block of demographic questions. Within this block, we also include two questions designed to capture whether the respondent belongs to a housing issue public, that is, people for whom housing-affordability issues are exceptionally politically important. First, we ask a state-specific version of @RyaEhl23's issue-public question: [^1]

[^1]: Respondents who who were asked, in Block 2, about the effect of a shock caused by better homebuilding technology, are in this block asked abut the effect of a shock caused by land-use deregulation. Conversely, respondents who were previously asked about the effect of a shock caused by land-use deregulation are now asked about the effect of a shock caused by better homebuilding technology. Our prior work found that respondents were equally skeptical that either type of shock would reduce home prices and rents.

> Some people have a political issue that they care about more than most other issues. They might think about the issue a lot. They might pay particular attention to news about that issue, even when it's not making national news. They might focus on what political candidates say about that issue, and decide who to vote for on the basis of that issue. Or they might just care about the issue a lot.
>
> **Thinking about problems in \${e://Field/State} today**, is there an issue like that for you?

\noindent Respondents who answer affirmatively are then asked:

> In a short phrase or a sentence or two, what is the \${e://Field/State} issue that you care about?

After the free-text issue-publics question, we give respondents a list of 12 issues and instruct, "Considering just the following issues in \${e://Field/State} today, choose up to three that you care about the most." The issues are "Cost of housing," "Abortion," "Availability of Jobs," "Inflation," "Crime," "Education," "Environment," "Taxes," "Health Care," "Immigration," "Racism," and "Homelessness." The final response choice is, "I don't care about any of these issues." This question is analogous to, but less demanding than, @RyaEhl23's closed-form issue-publics question.[^2] Finally, respondents answer a question about which actors are most responsible for high housing prices and rents in their area, and a question about their preference with respect to future home prices and rents in their city (higher, lower, or the same as today).

[^2]: After administering the survey, we will aggregate responses by state and send them to the Governor and leaders of the state legislature.

## Block 2: Pretreatment Beliefs about Price Effects of Supply Shock

Here, we ask respondent to predict the directional effect of a large, exogenous regional housing supply shock on prices or rents for a typical existing home in their city. Half of the respondents are randomly assigned to a scenario in which the shock is caused by a technology change that improves homebuilder productivity; the other half receive a scenario in which land-use deregulation causes the shock. Both questions are borrowed from @NalElmOkl22.

## Block 3: Perceived Efficacy of Housing Policies

In this block, respondents make pairwise evaluations of the likely efficacy of different policies for "helping people in \${e://Field/State} get housing they can afford."

The block opens with this statement:

> Local governments use zoning laws to control the types of development allowed in different areas (or zones). For example, a city may have a single-family zone allowing only detached houses, a zone for shopping malls or car dealers, and additional zones where apartment buildings are allowed.

> Some states have passed laws to ensure that certain types of housing may be developed. These laws override local zoning.

\noindent Next, we inform respondents of their task:

> State lawmakers who think housing has gotten too expensive are considering many ideas for how to make it more affordable.

> The next questions ask about some of these ideas. You will be shown several pairs of policies. In each pair, select the policy you think would be **more effective for helping people in \${e://Field/State} get housing they can afford.**

> We want to know which policy you think would be more effective, not which one you personally like the best.

\noindent Then we define key terms:

> Two definitions before we proceed:
>
> -   [Market-rate housing]{.underline} is built without financial support from the government and may be sold or rented to anyone at any price.
>
> -   [Subsidized affordable housing]{.underline} is built with government financial support and may be sold or rented only to low-income or moderate-income households, at prices they can afford.

\noindent An attention-check question verifies whether respondents read and remembered the definition of "subsidized affordable housing." Respondents who fail the check receive a restatement of the correct definition but not dropped from the sample.[^3]

[^3]: We suspect that dropping them would bias the sample toward high-cognitive-ability individuals.

After the attention check, respondents receive the pairwise-efficacy questions, presented as follows:

> Question *n* of 5. Which policy would be more effective for helping people in \${e://Field/State} get housing they can afford? \[{Policy A}, {Policy B}, Don't know\]

\noindent Policies A and B are sampled without replacement from the policies shown in Table @tbl-hous-policies. Broadly speaking, the policies can be categorized as follows: (1) supply-side policies targeting market-rate housing, (2) supply-side policies targeting subsidized affordable housing, (3) untargeted supply-side policies, (4) demand subsidies, (5) price controls, and (6) "demand fences" (efforts to keep "bad" types from buying or using property). We chose these policies with an eye to representing the most prominent policy disputes in state and local housing politics. We recognize that some policies could be categorized in more than one way. For example, "restricting development of market-rate housing on sites that could be developed for affordable housing in the future" could be considered not just a demand fence (excluding for-profit develpers from the market for sites), but also a supply-side policy targeting subsidized affordable housing (increasing the availability of sites for such projects). And all policies designed to promote BMR housing can be considered a kind of price control, even though they're not controlling the price of an already existing good or one that market actors seek to produce.

| Policy | Type |
|--------|------|

| Allow more market-rate apartments in existing neighborhoods \| Supply Side - MR \|
| Allow more market-rate homes on open land \| Supply Side - MR \|
| Allow more subsidized affordable apartments in existing neighborhoods \| Supply Side - BMR \|
| Allow more subsidized affordable homes on open land \| Supply Side - BMR \|
| Increase government spending on building affordable housing \| Supply Side - BMR \|
| Reduce fees and taxes on housing development \| Supply Side - Untargeted \|
| Limit how much landlords may increase rents \| Price Control \|
| Limit how much cities may increase property taxes \| Price Control \|
| Make cities approve housing proposals that comply with city rules \| Supply Side - Untargeted \|
| Provide more housing vouchers (''Section 8'') to low-income people \| Demand Subsidy \|
| Give renters a tax break \| Demand Subsidy \|
| Give first-time homebuyers a no-interest government loan for their down payment \| Demand Subsidy \|
| Require housing developments to include affordable housing for middle-income people \| Price Control \|
| Require housing developments to include affordable housing for low-income people \| Price Control \|
| Restrict Wall Street investors from buying up homes \| Demand Fence \|
| Restrict development of market-rate housing on sites that could be developed for subsidized affordable housing in the future \| Demand Fence \|
| Reduce requirements for off-street parking in new developments \| Supply Side - Untargeted \|

: Housing Policies Evaluated for Perceived Efficacy {#tbl-hous-policies}

## Block 4: Policy Preferences

After the pairwise efficacy comparisons, we elicit respondents' preferences with respect to ten housing and non-housing issues. The choice set for each issue consists of a pair of contrasting policy positions, usually representing directional changes from the status quo (e.g., "reduce" vs. "increase"), plus "Don't know." The issues are sampled without replacement from the rows of @tbl-policies, which comprise the 17 housing policies from @tbl-hous-policies plus 22 non-housing policies. The non-housing policies are meant to be a reasonably comprehensive reference set of non-housing issues that often figure into state legislative elections. Most of these items were drawn and adapted from recent surveys by other scholars working on the measurement of issue importance [e.g., @RyaEhl23; @SidTauVav22].

```{r}
#| label: tbl-policies
#| echo: false 
#| warning: false

pol_tab_df <- issues %>% 
  dplyr::select(code, lib_position, con_position) %>%
  dplyr::rename(`Issue Code` = code,
                Position_1 = lib_position,
                Position_2 = con_position)

pol_tab <- knitr::kable(pol_tab_df, 
             format = "markdown")

 pol_tab %>% 
   kableExtra::column_spec(column = c(1,2,3), 
                           width = c("4.5cm", "5.5cm", "5.5cm")
             )  %>%
   kableExtra::kable_styling(font_size = 10) %>%
   kable_styling(latex_options = "striped")

```

```{r}
#| label: tbl_1 
#| warning: false

# This makes table of housing policies for the paper. 
issues %>% 
  dplyr::select(lib_position, 
                con_position, 
                policy_type, 
                plot_shorthand) %>%
  mutate( # adding Clayton's preferred usage
    policy_type = case_when( 
      policy_type == "Supply Side - MR" ~ "Supply Side (Market Rate)",
      policy_type == "Supply Side - BMR" ~ "Supply Side (Below Market Rate)",
      policy_type == "Supply Side - Untargeted" ~ "Supply Side (Untargeted)",
      TRUE ~ policy_type)
    ) %>%
  dplyr::relocate(policy_type, lib_position, con_position) %>%
  rename(`Position 1` = lib_position,
         `Position 2` = con_position,
         `Policy Type` = policy_type,
         `Plot Shorthand` = plot_shorthand) %>%
  filter(!`Policy Type` %in% c("Economic", "Social")) %>% 
  gt() %>%
  tab_header(title = "Housing Policy Positions on Survey") %>%
  tab_source_note(source_note = "Randomly chosen pairs of items from the column, ''Position 1,'' are presented to respondents in the perceived-efficacy section of our survey. Respondents state their preference as between ''Position 1'' and ''Position 2'' (within a row) in the policy-support section of the survey. ''Policy Type'' and ''Plot Shorthand'' are used to represent policies in the figures depicting our results.") %>%
  opt_row_striping() %>%
  gtsave(path=here::here("figures","PIPE_paper"), 
         file="tab_1A_hous.tex")

# This makes table of nonhousing policies for the paper. 
issues %>% 
  dplyr::select(lib_position, 
                con_position, 
                policy_type, 
                plot_shorthand) %>%
  dplyr::relocate(policy_type, lib_position, con_position) %>%
  rename(`Position 1` = lib_position,
         `Position 2` = con_position,
         `Policy Type` = policy_type,
         `Plot Shorthand` = plot_shorthand) %>%
  filter(`Policy Type` %in% c("Economic", "Social")) %>% 
  gt() %>%
  tab_header(title = "Non-housing Policy Positions on Survey") %>%
  tab_source_note(source_note = "Respondents state their preference as between ''Position 1'' and ''Position 2'' (within a row) in the policy-support section of the survey. ''Policy Type'' and ''Plot Shorthand'' are used to represent policies in the figures depicting our results.") %>%
  opt_row_striping() %>%
  gtsave(path=here::here("figures","PIPE_paper"), 
         file="tab_1A_non_hous.tex")

```

## Block 5: Policy Priorities Conjoint

After answering the 10 policy questions, respondents perform an exercise that's meant to reveal whether housing policy positions are, individually or as a class, electorally important in state politics relative to the set of prominent non-housing issues in @tbl-policies. Our design largely follows @SidTauVav22, who, after asking respondents for their positions on numerous nationally prominent issues, presented conjoint profiles consisting of 2-4 of the same issues. (See also @RyaEhl23 and @HanLauViv20.) The relative importance of issues can then be quantified as the probability that a respondent chooses a profile that includes his or her position on the issue.

Each respondent evaluates 5 profiles. Figure @fig-conjoint-example illustrates the conjoint task.

![Conjoint-Task Example](Conjoint_Example.png){#fig-conjoint-example}

## Block 6: Retests

Following [@ClaHorKau23]'s recommendations for estimating measurement error, we retest respondents on their first pairwise-efficacy question, their first policy-preference question, and their first conjoint question, with the order of the choices reversed. We will report error rates in the supplemental information. We aren't pre-specifying any adjustments to account for measurement error because we're not sure how best (if at all) to adjust for it, given the principal estimands.

## Closing Block: Final Demographics

The survey closes with a short block of supplemental demographic questions, which elicit neighborhood type, ideology, partisanship, household income, and housing costs.

# Estimands and Analytical Strategy

```{r necessary functions}
#| echo: false 
## These functions are used to analyze pilot survey data included in an appendix to the paper. They are parked here for convenience, but not used until the Appendix section.
## Set echo to false to make PAP narrative more
## Compare columns for the simulated data
## This will ingest D_efficacy which is created in a chunk below. 

sim_compare_columns <- function(df) {
  # Initialize empty columns for winners, losers and draw
  df$winner <- NA_character_
  df$loser <- NA_character_
  df$draw <- NA
  temp_wins <- NULL

    # Compute winner, loser and draw for this suffix
    temp_df <- df %>% dplyr::select(c(policy_first, policy_second, score_first,winner, loser, draw, ResponseId)) %>%       ###This matches the value in score_first, which denotes which policy the respondent chose to the winning policy between 1 and 2. score_first==1 when the first policy won, 0 when it lost, and .5 when there was a draw.
      ##For draws, policy 1 goes in the winner column and 2 in the loser column. This doesn't matter, since both losses and draws go into the denominator. 
      dplyr::mutate(
        winner = case_when(score_first == 1 ~ policy_first, 
                           score_first == 0 ~ policy_second,
                           score_first == .5 ~ policy_first),
        loser = case_when(score_first == 0 ~ policy_first, 
                           score_first == 1 ~ policy_second,
                           score_first == .5 ~ policy_second),
        draw = if_else(is.na(draw), if_else(score_first == .5, TRUE, FALSE), NA))
  ##This column is the winner and loser of each pairwise efficacy section, by respondent.    
    temp_wins <- temp_df %>% dplyr::select(winner,loser, draw, ResponseId)
  
  return(temp_wins)
}

###These are the policy categorizations. We need them to compare by policy group. 

issue_topics <- data.frame(
  Policy = c(
    "Allow more market-rate apartments in existing neighborhoods",
    "Allow more market-rate homes on open land",
    "Allow more subsidized affordable apartments in existing neighborhoods",
    "Allow more subsidized affordable homes on open land",
    "Increase government spending on building affordable housing",
    "Reduce fees and taxes on housing development",
    "Limit how much landlords may increase rents",
    "Limit how much cities may increase property taxes",
    "Make cities approve housing proposals that comply with city rules",
    "Provide more housing vouchers (''Section 8'') to low-income people",
    "Give renters a tax break",
    "Give first-time homebuyers a no-interest government loan for their down payment",
    "Require housing developments to include affordable housing for middle-income people",
    "Require housing developments to include affordable housing for low-income people",
    "Restrict Wall Street investors from buying up homes",
    "Restrict development of market-rate housing on sites that could be developed for subsidized affordable housing in the future",
    "Reduce requirements for off-street parking in new developments"
  ),
  Type = c(
    "Supply Side - MR",
    "Supply Side - MR",
    "Supply Side - BMR",
    "Supply Side - BMR",
    "Supply Side - BMR",
    "Supply Side - Untargeted",
    "Price Control",
    "Price Control",
    "Supply Side - Untargeted",
    "Demand Subsidy",
    "Demand Subsidy",
    "Demand Subsidy",
    "Price Control",
    "Price Control",
    "Demand Fence ",
    "Price Control",
    "Supply Side - Untargeted"
  )
)


####This function puts the sim data efficacy comparisons in a format that one can calculate win percentages clustering on the respondent. 

sim_win_rate_cluster<- function(x){
  ##These are the winners and a "dv" column for a future lm_robust call.
  temp_1 <- data.frame("policy"= x$winner, "draw"=x$draw, "id"=x$ResponseId) %>% mutate(y=case_when(draw==F ~ 1,
                                    draw==T ~ .5))                # Same thing except losers             
  temp_2 <- data.frame("policy"= x$loser, "draw"=x$draw, "id"=x$ResponseId) %>% 
    mutate(y=case_when(draw==F ~ 0, 
                       draw==T ~ .5))
  ##Bind both together and then join the issue topic area. 
  temp_df <- bind_rows(temp_1, temp_2) %>% left_join(issue_topics, by=c("policy"="Policy"))
    return(temp_df)
}
  

###Compare columns for the SS3 (pilot) data
#type takes value of CE or CN

compare_columns <- function(df,type) {
  # Initialize empty columns for winners, losers and draw
  df$winner <- NA_character_
  df$loser <- NA_character_
  df$draw <- NA
  temp_wins <- NULL

  # Iterate through suffixes 1 to 9
  for (suffix in 1:9) {
    # Select columns to compare
    eff_col <- paste0("efficacy", "_", type,"_", suffix)
    c1_col <- paste0("efficacy", "_", type,"_", suffix, "_c1")
    c2_col <- paste0("efficacy", "_", type,"_", suffix, "_c2")
    
    # Compute winner, loser and draw for this suffix
    temp_df <- df %>% dplyr::select(c(eff_col, c1_col, c2_col, winner, loser, draw, ResponseId)) %>% 
      ###This part of the function matches what was in eff_col to c1 and c2 to find the winner. In the case of a draw, I make c1 the winner and c2 the loser. This doesn't matter because the change in rating is computed as a draw for both. 
      dplyr::mutate(
        winner = case_when(!!sym(eff_col) == !!sym(c1_col) ~ !!sym(c1_col), 
                           !!sym(eff_col) == !!sym(c2_col) ~ !!sym(c2_col),
                           !!sym(eff_col) == "no_opinion" ~ !!sym(c1_col)),
        loser = case_when(!!sym(eff_col) == !!sym(c1_col) ~ !!sym(c2_col), 
                           !!sym(eff_col) == !!sym(c2_col) ~ !!sym(c1_col),
                          !!sym(eff_col) == "no_opinion" ~ !!sym(c2_col)),
        draw = if_else(is.na(draw), if_else(!!sym(eff_col) == "no_opinion", TRUE, FALSE), NA))
        
    ##This is just leftover code from the first version of the function. I'm keeping it here in case it's useful later.
      # if_else(is.na(winner), if_else(!!sym(eff_col) == !!sym(c1_col), !!sym(c1_col),
          #                                        if_else(!!sym(eff_col) == !!sym(c2_col), !!sym(c2_col), NA), NA), NA),
        # loser = if_else(is.na(loser), if_else(!!sym(eff_col) == !!sym(c1_col), !!sym(c2_col),
        #                                         if_else(!!sym(eff_col) == !!sym(c2_col), !!sym(c1_col), NA), NA), NA),
        #draw = if_else(is.na(draw), if_else(!!sym(eff_col) == "no_opinion", TRUE, FALSE), NA)
      
    temp_wins <- bind_rows(temp_wins, temp_df %>% dplyr::select(winner,loser, draw, ResponseId))
  }
    
  return(temp_wins)
}


###Load policy names for pilot data

ce_names <- read_csv(here::here("data_raw", "CE_policies.csv")) %>% 
  mutate(policy = gsub("^o ", "",policy),
         num=as.character(num)) 

cn_names <- read_csv(here::here("data_raw", "cn_policies.csv")) %>% 
  mutate(policy = gsub("^o ", "",policy),
         num=as.character(num)) 



###Create a new function to calculate win percentage that accounts for clustering


##I don't know if Chris' method of assigning .5 to a draw is correct. I get the same point estimates as the original function when I treat draws as NA. The point estimates drop when draws are .5. The ordering remains the same. 

##For now, I will keep it .5 so it's consistent with Chris, but will discuss. 

##Ingest output of compare columns
win_rate_cluster<- function(x, type){
  temp_1 <- data.frame("policy"= x$winner, "draw"=x$draw, "id"=x$ResponseId) %>% mutate(y=case_when(draw==F ~ 1,
                                    draw==T ~ .5))                                                                
  temp_2 <- data.frame("policy"= x$loser, "draw"=x$draw, "id"=x$ResponseId) %>% 
    mutate(y=case_when(draw==F ~ 0, 
                       draw==T ~ .5))
  
  if(type=="CE"){
    temp_df <- bind_rows(temp_1, temp_2) %>% left_join(ce_names, by=c("policy"="num"))
    return(temp_df)
  }
  else{
    temp_df <- bind_rows(temp_1, temp_2) %>% left_join(cn_names, by=c("policy"="num"))
    return(temp_df)
  }
}

```

## Baseline Beliefs

### The Efficacy of Housing Policies {#sec-strat-efficacy}

We quantify beliefs about the relative efficacy of housing policies for "for helping people in \${e://Field/State} get housing they can afford" by estimating the win-rate of each policy in the randomized matchup.

We will also provide heat-map plots showing the rate at which policies grouped into the classifications shown in @tbl-hous-policies "beat" policies in the other groupings.

In the Supplemental Information, we provide a further robustness check by fitting a Bradley-Terry model to the pairwise comparisons data. (See @LauBlu23 for a discussion of the Bradley-Terry model and a related application to perceptions of issue importance.) In principle, the Bradly-Terry model, which accounts for the "strength" of the comparator policy, should yield more precise estimates of the policies' ranks, but the win-rate estimates have advantage of being easier to interpret. Below, in @sec-analysis-efficacy-power, we report results from a simulation-based power analysis which finds no material increase in power from using the Bradley-Terry model.

We will provide subgroup results by tenure (homeowners vs. renters), stated desire for future home prices and rents (higher vs. not-higher), partisanship (Democrats vs. Republicans), and issue publics (splitting respondents by whether they do or do not identify as members of a housing issue public). For purposes of the issue-publics analysis, we define respondents as belonging to the housing issue public if they selected "Cost of housing" as one of the top-three problems in their state.[^4]

[^4]: We will also provide an off-plan analysis using the free-text importance question to classify respondents as belonging to a housing issue public or not, but because classification using that question will necessarily be more subjective, we don't pre-specify the classification.

Based on the results of a pilot survey reported in the Appendix, we expect to find that demand subsidies and price controls are regarded as more effective than supply-side measures, particularly supply-side measures that target market-rate housing.

### Housing Policy Preferences {#sec-strat-preferences}

We will estimate the share of respondents (in the full sample, and in each target subgroup) who support each of the 17 housing policies. We will use plots to visualize the aggregate relationship between support and perceived efficacy. (See the Appendix for illustrative examples from the pilot.) Finally, we will provide correlation matrices to show how support for polices of different types hangs together.

We expect to find that demand subsidies and price controls draw higher levels of support than supply-side measures, particularly supply-side measures that target market-rate housing.

### Are Housing Policies Prioritized? {#sec-strat-priorities}

We hypothesize that a large share of the mass public will list "housing costs" as a priority issue, but that housing policy stances will prove less important---even among the self-categorized housing issue public---per the conjoint choice tasks relative to traditional social and economic policy issues. We base this conjecture on the fact that housing policy has not been subsumed by national partisan politics, which may result in most people being less sure of their preferences.

If this conjecture is borne out, it suggests that state legislators have a lot of slack to choose the housing policies they think best, though there may be some margins over which downstream policy impacts would risk a backlash.

# Import and Recode Survey Data

This section encodes the main variables of interest.

```{r}
#| label: import Qualtrics data
#| include: false

# Qualtrics export settings: numeric, show randomization order

filename <- "NEO - Housing Policy Efficacy Survey (2024)_March 13, 2024_14.39.csv"

col_names <- names(read_csv(here::here("data_raw", filename), n_max = 0))

D <- read_csv(here::here("data_raw", filename),  
                  col_names = col_names, 
                  col_types = cols(.default = col_character()),
                  skip=3)

# add filter when using the real data instead:
# D <- D %>% filter(!is.na(PID) & PID != "testPID") # drop fake & bench-test data (pre-launch), i.e., responses w/o a Forthright identifier

```

```{r}
#| label: recode Qualtrics survey data

#Toggle testing mode. Set to F for analysis of real data rather than bench-tester data.
testing <- F

# drop respondents who did not finish or got bumped at attention check stage.  
D <- D %>%
  filter(Finished == '1') %>%
  filter(Q2.1 == 1 & Q2.2 == "1,2")

# print respondent IDs for Rick
# 
# RIDs <-D %>% dplyr::select(RESPONDENT_ID)
# write_csv(RIDs, here::here("data_transformed", "soft_launch_RIDs.csv"))

if(testing==T){
  target.N <- 5000 # target sample size 
  D <- replicate(floor(target.N/nrow(D)), D, simplify = FALSE) %>% bind_rows() # create N approx equal to target.N simulated sample. replicate rather than resample to avoid ending up with test data that has no observations on some items.
  D$ResponseId <- paste0("R", 1:nrow(D))   # new unique identifier (Qualtrics ID is replicated)
}

# drop embedded data fields used only in question text, and other vestigal and unnecessary stuff
D <- D %>%
  dplyr::select(- (RecipientLastName:ExternalReference), # Qualtrics unused field
         - UserLanguage, # Qualtrics unused field 
         - Q5.6, # used if packrat == TRUE 
         - Q5.7, # used if packrat == TRUE
         - Q5.8, # used if packrat == TRUE 
         - (`Q30.2`:`Q30.5`), # used if packrat == TRUE (local politics Qs)
         )
         
# Intro demographics section of survey
D<-D%>%
  mutate(
    age.cat=recode(Q5.1,'1'='18-29', '2'='30-44', 
                        '3'='45-64', '4'='65 plus'),
    male=as.numeric(Q5.2=='1')
  )

D$race.eth<-NA
D$race.eth[D$Q5.3=="1"]<-"White"
D$race.eth[D$Q5.3=="2"]<-"Black"
D$race.eth[D$Q5.3=="4"]<-"Asian"
## Order is important here; any Hispanic supersedes race.
D$race.eth[grep("3", D$Q5.3)]<-"Hispanic"
D$race.eth[D$Q5.3%in%c("5", "6")]<-"Multi/Other"
D$race.eth[is.na(D$race.eth)]<-"Multi/Other"

D <- D %>%
  mutate(
    has.ba = as.numeric(Q5.4=='4'),
    tenure = Q5.5, # 1=own, 2=rent, 3=dependent, 4=other.  
    tenure = recode(tenure, 
                     '1' = 'Owner', 
                     '2' = 'Renter', 
                     '3' = 'Other',
                     '4' = 'Other',
                    ),
    ownhome = as.numeric(tenure == "Owner"), # encoding on SS2
    want.price = recode(Q5.15, '1'='Higher', '2'='Same', '3'='Lower'),
    income = as.numeric(Q30.11), # not recoded, 11 point scale, `1` < $30,000, `11` > $150,000
    housing.costs = as.numeric(Q30.12), # not recoded, 14 point scale, `1` < $250/month, `14` > $20k/month
    cost.burden = housing.costs/income # very coarse measure of cost burden, dividing income on 11 point scale by (15 - housing expense on a 14 point scale))
)  

# partisanship, ideology, national politics
D <- D %>%
  mutate(
    repub = as.numeric(Q30.8), # 1 = Strong, 2 = Not so strong
    dem = as.numeric(recode(Q30.7, '1'='7', '2'='6')),
    ind = as.numeric(recode(Q30.9, '1'='5', '2'='3', '3'='4')),
    pid7 = coalesce(repub, dem, ind)
  ) %>%
  dplyr::select(-repub, -dem, -ind) %>%
  mutate(
    libcon=as.numeric(recode(Q30.1, '1'='-1', '2'='0', '3'='1', '4'=NULL)),
    # voted20=as.numeric(Q30.2),
    pid3.nolean=recode(Q30.6, '1'='dem', '2'='rep', '3'='io', '4'='io')
    )

D$pid3.wlean<-D$pid3.nolean
D$pid3.wlean[D$Q30.9=='1']<-'dem'
D$pid3.wlean[D$Q30.9=='2']<-'rep'


# respondent description of own neighborhood
D <- D %>%
  mutate(
    nabe.live=recode(Q30.10,
                     '1'='city_dense', 
                     '2'='city_sparse', 
                     '3'='burb_dense', 
                     '4'='burb_sparse',
                     '5'='small_town',
                     '6'='rural'
                     )
  )

# Typical housing type in respondent's town or city
D <- D %>%
  mutate(
    hous.typ.rent = recode(Q6.1, '1'='Small_Apt', '2'='Large_Apt', '3'='Small_Townhome', '4'='Large_Townhome', '5'='Small_SFH', '6' = "Large_SFH"),
    hous.typ.own = recode(Q6.2, '1'='Small_Condo', '2'='Large_Condo', '3'='Small_Townhome', '4'='Large_Townhome', '5'='Small_SFH', '6' = "Large_SFH")
  )

# Pretreatment blame for high prices & rents
D <- D %>%
  mutate(
    blame.fedstate = as.numeric(str_detect(Q5.16, "1") & !str_detect(Q5.16, "1\\d")),
    blame.local = as.numeric(str_detect(Q5.16, "2") & !str_detect(Q5.16, "2\\d")),
    blame.ibank = as.numeric(str_detect(Q5.16, "3")),
    blame.foreign = as.numeric(str_detect(Q5.16, "4")),
    blame.fed = as.numeric(str_detect(Q5.16, "5")),
    blame.developer = as.numeric(str_detect(Q5.16, "6")),
    blame.landlord = as.numeric(str_detect(Q5.16, "7")),
    blame.homeowner = as.numeric(str_detect(Q5.16, "8")),
    blame.enviro = as.numeric(str_detect(Q5.16, "9")),
    blame.nimby = as.numeric(str_detect(Q5.16, "10")),
    blame.richmover = as.numeric(str_detect(Q5.16, "11")),
    blame.employer = as.numeric(str_detect(Q5.16, "12")),
  )

# Pretreatment issue importance  
D <- D %>%
  mutate(
    imp.open = as.numeric(Q5.9 == "1"),
    imp.open.text = Q5.10,
    imp.top3.housing = as.numeric(str_detect(Q5.12, "1") & !str_detect(Q5.12, "1\\d")),
    imp.top3.abortion = as.numeric(str_detect(Q5.12, "2") & !str_detect(Q5.12, "2\\d")),
    imp.top3.jobs = as.numeric(str_detect(Q5.12, "3") & !str_detect(Q5.12, "3\\d")),
    imp.top3.inflation = as.numeric(str_detect(Q5.12, "4")),
    imp.top3.crime = as.numeric(str_detect(Q5.12, "5")),
    imp.top3.education = as.numeric(str_detect(Q5.12, "6")),
    imp.top3.environment = as.numeric(str_detect(Q5.12, "7")),
    imp.top3.taxes = as.numeric(str_detect(Q5.12, "8")),
    imp.top3.healthcare = as.numeric(str_detect(Q5.12, "9")),
    imp.top3.immigration = as.numeric(str_detect(Q5.12, "10")),
    imp.top3.racism = as.numeric(str_detect(Q5.12, "11")),
    imp.top3.homeless = as.numeric(str_detect(Q5.12, "12")),
    imp.top3.none = as.numeric(str_detect(Q5.12, "13"))
  )


# Pre- & post-treatment housing supply-shock beliefs. Higher values correspond to predictions of positive effect on price, i.e., greater "supply skepticism."
D <- D %>%
  mutate(
    shock.pre = coalesce(Q7.1, Q7.2),  
    shock.pre = 6 - as.numeric(shock.pre), 
    shock.pre.dir = recode(shock.pre, '5' = 'Higher', '4' = 'Higher', '3' = 'No change', '2' = 'Lower', '1' = 'Lower'), 
    shock.post = coalesce(Q23.1, Q23.2), 
    shock.post = 6 - as.numeric(shock.post),  
    shock.post.dir = recode(shock.post, '5' = 'Higher', '4' = 'Higher', '3' = 'No change', '2' = 'Lower', '1' = 'Lower')
    )

# Pass attention check on definition of subsidized affordable housing?
D <- D %>%
  mutate(
    SAH.attn.pass = as.numeric(Q10.1 == "1")
  )

# Pass attention check on treatment message?
D <- D %>%
  mutate(
    treat.attn.pass = as.numeric(
      coalesce(Q16.1 == "1",
               Q18.1 == "1",
               Q20.1 == "1",
               Q22.1 == "1")
      ))
 
# Post-treatment DV2 (relative efficacy of intense policies)
D <- D %>%
  mutate(
    MR.more.effective = as.numeric(Q24.1 == "1"),
    MR.refcat = stringr::str_extract(Q24.1_DO, "\\d"),
    MR.refcat = case_match(MR.refcat, '2' ~ "BMR_spend", '3' ~ "rent_control", '4' ~ "free_loans")
  )

# Post-treatment DV3 (support for building more market-rate housing)
D <- D %>%
  mutate(
    MR.support.TOD = 6 - as.numeric(Q25.1),
    MR.support.sprawl = 6 - as.numeric(Q26.1)
  )

# Post-treatment DV4 (send a message to policymakers)
D <- D %>%
  mutate(
    sent.message = as.numeric(!is.na(coalesce(Q27.2, Q27.3))),
    sent.message.text = coalesce(Q27.2, Q27.3),
    sent.message.words = stringr::str_count(sent.message.text, ' ') + 1,
    sent.message.words =replace_na(sent.message.words, 0) # response encoded as zero if respondent declined invite and thus wrote no words
  )

# Look for speeders and exclude those who completed survey in less than 1/3 of the median time.
D <- D %>% 
  rename(duration_in_seconds = `Duration (in seconds)`)

survey.time <- D %>% 
  filter(Finished == "1") %>%
  mutate(
    seconds = as.numeric(duration_in_seconds),
    minutes = factor(case_when(
      seconds < 5*60 ~ "Less than 5 min",
      seconds >= 5*60 & seconds < 7*60 ~ "5-7 min",
      seconds >= 7*60 & seconds < 9*60 ~ "7-9 min",
      seconds >= 9*60 & seconds < 11*60 ~ "9-11 min",
      seconds >= 11*60 & seconds < 15*60 ~ "11-15 min",
      seconds >= 15*60 & seconds < 20*60 ~ "15-20 min",
      seconds >= 20*60 ~ "More than 20 min"
    ), levels = c("Less than 5 min","5-7 min","7-9 min","9-11 min","11-15 min","15-20 min","More than 20 min"))
  ) %>% 
  group_by(minutes) %>%
  summarize(n = n())

D <- D %>% mutate(duration_in_seconds = as.numeric(duration_in_seconds))

D %>%
  mutate(
    speeders = duration_in_seconds <= median(duration_in_seconds, na.rm = TRUE)/3
    ) %>%
  group_by(speeders) %>%
  summarize(n = n())

# D %>%
#   mutate(
#     speeders = duration_in_seconds <= median(duration_in_seconds, na.rm = TRUE)/3
#     ) %>%
#   filter(speeders == TRUE) %>%
#   dplyr::select(RESPONDENT_ID) %>%
#   write_csv(here::here("data_transformed", "speeder_RIDs.csv"))

# exclude respondents who completed survey in less than 1/3 of median survey time
 D <- D %>%
   filter(duration_in_seconds > median(D$duration_in_seconds/3, na.rm = TRUE)) 
 
  D %>%
   group_by(tenure) %>% summarize(n = n())
 
if(testing == TRUE){D$treat[D$treat == "NULL"] <- "none"} # to avoid confusion with R reserved word, NULL. (Survey has been updated to avoid the issue, but it's present in the bench tester data)
   
```

```{r}
#| label: free_text_issue_public_classify   
# offplan
# This chunk converts answers to the free-text question about the most important issue in the respondent's state into a classifier that corresponds to the 12 categories in the closed-form most-important-issue question. It integrates two "bag of words" approaches, based on their relative performance vis-a-vis human coders. See Other Exploratory Results, below   

# Make tibble of categories and keywords for each. Categories here correspond to the closed-form issue public question, to which respondents are encoded as imp.top3.x, where x is the issue
issue_pub_categories <- tibble::tribble(
  ~ cat, ~ strings,
  "imp.open.housing", "hous|rent|homes|property tax",  
  "imp.open.homeless", "homeless|unhoused",
  "imp.open.inflation", "inflation|cost of|rising cost|rising price|prices",
  "imp.open.immigration", "immigra|border|migrant",
  "imp.open.crime", "crime|criminal|violent|violence|illegal drug|gun control|disorder",
  "imp.open.abortion", "abortion|reproductive",
  "imp.open.jobs", "job|wage",
  "imp.open.education", "educat|school|college",
  "imp.open.environment", "enviro|climate|nature|warming",
  "imp.open.taxes", "tax",
  "imp.open.healthcare", "health|prescription drug",
  "imp.open.racism", "race|racial|discrim|black|woke"
)

# function to create imp.open indicators
create_imp.open.i <- function(data, categories) {
  for (i in 1:nrow(categories)) {
    category <- categories$cat[i]
    indicator <- str_detect(
      string = str_to_lower(data$imp.open.text), 
      pattern = categories$strings[i])
    data[[paste0(category)]] <- indicator
  }
  return(data)
}

# make the indicators
D <- create_imp.open.i(data = D, categories = issue_pub_categories)

# create an indicator for respondents who list either housing or homelessness as their top issue
D$imp.open.housing.or.homeless <- D$imp.open.housing | D$imp.open.homeless 

# assign FALSE to the imp.open issue public fields if respondent said they don't have a priority issue
D <- D %>%
  mutate(
    across(imp.open.housing:imp.open.housing.or.homeless, 
           ~ case_when(is.na(.) & !is.na(Q5.9) ~ FALSE,
                       TRUE ~ .)
  )) 

D %>% summarize(across(
  imp.open.housing:imp.open.housing.or.homeless, 
  \(x) round(mean(x, na.rm = TRUE), digits = 2)))

# check pairwise correlation with closed-form importance questions:
compute_correlations <- function(data, string) {
  # Define pattern based on the input string
  pattern <- paste0("\\.", string)
  subset_data <- data %>% dplyr::select(matches(pattern))
  cor(subset_data, use = "pairwise.complete.obs")
}

namelist <- c("housing", "homeless", "inflation", "immigration",
              "crime", "abortion", "jobs",  "education",
              "environment", "taxes", "healthcare", "racism")

imp.cor.list <- map(namelist, ~ compute_correlations(filter(D, !is.na(Q5.9)), .x)) # excluding from D the respondents who said they had no MII

names(imp.cor.list) <- namelist

map(imp.cor.list, ~ .x[2,1])
```

## Perceived Efficacy of Housing Policies

### Code for Paper

#### Data Preparation

First, we make long dataframe, with one matchup per row, for fitting the pairwise efficacy models. The long dataframe is structured in the manner requrred by the [BradleyTerry2](https://cran.r-project.org/web/packages/BradleyTerry2/vignettes/BradleyTerry.pdf) package; the same structure is easily adapted for the winrate analysis too.

```{r}
#| label: long-df-efficacy-models 
#| output: false 

# The BradleyTerry2 package requires data in "long" format, with one matchup per row. See Secton 7.3 of https://cran.r-project.org/web/packages/BradleyTerry2/vignettes/BradleyTerry.pdf. 

  # Col 1: name of first policy in matchup
  # Col 2: name of second policy in matchup
  # Col 3: 1 if first policy won matchup, 0 if it lost, 0.5 for ties
  # Col 4: 1 if second policy won matchup, 0 if it lost, 0.5 for ties

# `policy_first_e` is the embedded data field which signifies which matchup in the sequence of 5 (for a given respondent) we're talking about. `policy_first` is the name of the first policy in that matchup.
D_efficacy <- D %>%
  pivot_longer(cols = matches("^efficacy[0-9]+_1$"),
               names_to = "policy_first_e",
               values_to = "policy_first")

# `policy_second` is the name of the second policy in the matchup. scoring follows recommended convention in the BradleyTaylor2 
D_efficacy <- D_efficacy %>%
  mutate(
    policy_second = case_when(
      policy_first_e == "efficacy1_1" ~ efficacy1_2,
      policy_first_e == "efficacy2_1" ~ efficacy2_2,
      policy_first_e == "efficacy3_1" ~ efficacy3_2,
      policy_first_e == "efficacy4_1" ~ efficacy4_2,
      policy_first_e == "efficacy5_1" ~ efficacy5_2
    ),
    outcome = case_when(
      policy_first_e == "efficacy1_1"  ~ Q11.1,
      policy_first_e == "efficacy2_1"  ~ Q11.3,
      policy_first_e == "efficacy3_1"  ~ Q11.5,
      policy_first_e == "efficacy4_1"  ~ Q11.7,
      policy_first_e == "efficacy5_1"  ~ Q11.9
    ),
    outcome = as.numeric(outcome),
    score_first = case_match(outcome, 
                             1 ~ 1,
                             2 ~ 0,
                             3 ~ 0.5),
    score_second = case_match(outcome, 
                             1 ~ 0,
                             2 ~ 1,
                             3 ~ 0.5),    
    .after = policy_first
  )

# drop rows with no outcome
D_efficacy <- D_efficacy %>%
  filter(!is.na(outcome))

# convert `policy_first` and `policy_second` into factor variables with the same levels
D_efficacy <- D_efficacy %>%
  mutate(
    across(c(policy_first, policy_second), 
          ~ factor(., levels = 
    c("Allow more market-rate apartments in existing neighborhoods",
      "Allow more market-rate homes on open land",
      "Allow more subsidized affordable apartments in existing neighborhoods",
      "Allow more subsidized affordable homes on open land",
      "Increase government spending on building affordable housing",
      "Reduce fees and taxes on housing development",
      "Limit how much landlords may increase rents",
      "Limit how much cities may increase property taxes",
      "Make cities approve housing proposals that comply with city rules",
      "Provide more housing vouchers (''Section 8'') to low-income people",
      "Give renters a tax break",
      "Give first-time homebuyers a no-interest government loan for their down payment",
      "Require housing developments to include affordable housing for middle-income people",
      "Require housing developments to include affordable housing for low-income people",
      "Restrict Wall Street investors from buying up homes",
      "Restrict development of market-rate housing on sites that could be developed for subsidized affordable housing in the future",
      "Reduce requirements for off-street parking in new developments")))) 

# add the `code` from the `issues` tibble for policy_first and policy_second. 
D_efficacy <- D_efficacy %>%
  left_join(issues %>% dplyr::select(lib_position, code), by = c("policy_first" = "lib_position")) %>%
  dplyr::rename(policy_first_code = code) %>%
  left_join(issues %>% dplyr::select(lib_position, code), by = c("policy_second" = "lib_position")) %>%
  dplyr::rename(policy_second_code = code)  
  
# convert policy_first_code and policy_second_code to factors
D_efficacy <- D_efficacy %>%
  mutate(
    across(c(policy_first_code, policy_second_code), 
          ~ factor(., levels = issues$code[1:17]))) # issues$code is ordered

# check to see whether any levels of policy_first_code and policy_second_code are missing data in the test data
levels(D_efficacy$policy_first_code)[!levels(D_efficacy$policy_first_code) %in% D_efficacy$policy_first_code]
levels(D_efficacy$policy_second_code)[!levels(D_efficacy$policy_second_code) %in% D_efficacy$policy_second_code]

```

#### Fit Models

As noted in @sec-strat-efficacy, we intend to report win rates as the primary measure of policy efficacy, because they are transparent and because our power analysis showed little if any gain from using a Bradley-Terry model instead. Standard errors are clustered on the respondent.

```{r}
#| label: functions-efficacy-models

# these functions require `data` in long format generated by the code chunk, @long-df-efficacy-models. 

# policy_1 and policy_2 are factor variables with the names of the first and second policies in a matchup, respectively. They must have the same levels.
# y1 is the score for policy_1 in a matchup, must be encoded 1 if it won the matchup, 0 if it lost, and 0.5 or NA if the matchup was a tie
# y2 is the score for policy_2 in a matchup, must be encoded 1 if this policy won the matchup, 0 if it lost, and 0.5 or NA if the matchup was a tie 
# id is a unique respondent-level identifier
# ref.cat is a reference category for fitting the BT model (the WR model doesn't use a reference category)

# (this code is probably clunkier than necessary)

fun_WR <- function(data, policy_1, policy_2, y1, y2, id, env = rlang::caller_env()) {

  # policy_1 wins
  temp1 <- data %>%
    dplyr::select(-c({{ policy_2 }}, {{ y2 }})) %>%
    dplyr::rename(policy = {{ policy_1 }},
                  y = {{ y1 }})

  # policy_2 wins
  temp2 <- data %>%
    dplyr::select(-c({{ policy_1 }}, {{ y1 }})) %>%
    dplyr::rename(policy = {{ policy_2 }},
                  y = {{ y2 }})

  D_winrate <- bind_rows(temp1, temp2)

    WR_env <- rlang::env(env, data = D_winrate)

    WR_call <- expr(
      lm_robust(y ~ policy - 1, # no intercept
                data = data,
                clusters = {{ id }}
                )
      )
    winrate_fit <- eval(WR_call, WR_env) %>%
      tidy(., conf.int = TRUE) %>%
      mutate(policy = str_replace(term, "^policy", ""),
           est_rank = rank(-estimate)
           ) %>%
      dplyr::select(-outcome) %>%
 #     dplyr::rename(winrate = estimate) # removed this renaming step for terminological consistency with other models

  return(winrate_fit)
}

# 2024.03.27. Minor correction: for purposes of illustrating group differences, it proved to be more transparent to plot just the differences in group judgments, rather than both group's separate judgments. However, our preanalysis plan code did not generate CIs on the group dfferences. Therefore, we added fun_WR_group() with a `group` argument to obtain CIs on the policy:group interaction.

fun_WR_group <- function(data, policy_1, policy_2, y1, y2, id, group, env = rlang::caller_env()) {
   # policy_1 wins
    temp1 <- data %>%
      dplyr::select(-c({{ policy_2 }}, {{ y2 }})) %>%
      dplyr::rename(policy = {{ policy_1 }},
                    y = {{ y1 }},
                    group = {{ group }})
    
    temp2 <- data %>%
      dplyr::select(-c({{ policy_1 }}, {{ y1 }})) %>%
      dplyr::rename(policy = {{ policy_2 }},
                    y = {{ y2 }},
                    group = {{ group }})   

    D_winrate <- bind_rows(temp1, temp2)
#    return(D_winrate)
    
      WR_env <- rlang::env(env, data = D_winrate)
  
      WR_call <- expr(
        lm_robust(y ~ policy + policy:group - 1, # no intercept
                  data = data,
                  clusters = {{ id }}
                  )
        )
      winrate_fit <- eval(WR_call, WR_env) %>%
        tidy(., conf.int = TRUE) %>%
        filter(str_detect(term, "group") & str_detect(term, "policy")) %>%
        mutate(policy = str_replace(str_extract(term, "^[^:]+"), "policy", ""),
               est_rank = rank(-estimate)
               )  
  return(winrate_fit)
}

fun_BT <- function(data, policy_1, policy_2, y1, y2, id, ref.cat, env = rlang::caller_env()) {
    policy_1 <- rlang::ensym(policy_1)
    policy_2 <- rlang::ensym(policy_2)
    y1 <- rlang::ensym(y1)
    y2 <- rlang::ensym(y2)
    id <- rlang::ensym(id)

  # fit BT model
  fitted_BT <- rlang::inject(
    BradleyTerry2::BTm(
      outcome = cbind(!!y1, !!y2), 
      player1 = !!policy_1,
      player2 = !!policy_2,
      refcat = !!ref.cat, 
      data = data)
    )

  #extract estimated abilities (lambda parameter) along with SEs and quasi SEs
  lambda_hats <- BradleyTerry2::qvcalc(BradleyTerry2::BTabilities(fitted_BT))$qvframe 

  lambda_hats <- lambda_hats %>% mutate(policy = row.names(lambda_hats))    

  return(lambda_hats)
}

```

```{r}
#| label: fit_perceived_efficacy_models
#| depends: long-df-efficacy-models, functions-efficacy-models
#| eval: true

winrate_full <- fun_WR(D_efficacy, 
                            policy_1 = policy_first_code, 
                            policy_2 = policy_second_code, 
                            y1 = score_first, 
                            y2 = score_second, 
                            id = ResponseId)

BT_full <- fun_BT(D_efficacy,
                  policy_1 = policy_first_code, 
                  policy_2 = policy_second_code, 
                  y1 = score_first, 
                  y2 = score_second,
                  id = ResponseId,
                  ref.cat = "Housing_Build_MR_Infill")
                  

efficacy_tenure <- D_efficacy %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  tidyr::nest(data = -tenure) %>%
  mutate(winrates = purrr::map(data, ~ fun_WR(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId)),
         BTlambdas = purrr::map(data, ~ fun_BT(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId,
                                             ref.cat = "Housing_Build_MR_Infill"))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory
         

efficacy_want <- D_efficacy %>%
  mutate(want.price = case_when(
    want.price %in% c("Higher", "Same") ~ "Not Lower",
    TRUE ~ want.price)
  ) %>%
  dplyr::filter(!is.na(want.price)) %>%
  tidyr::nest(data = - want.price) %>%
  mutate(winrates = purrr::map(data, ~ fun_WR(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId)),
         BTlambdas = purrr::map(data, ~ fun_BT(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId,
                                             ref.cat = "Housing_Build_MR_Infill"))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

efficacy_pid3 <- D_efficacy %>%
  dplyr::filter(!is.na(pid3.wlean)) %>%
  tidyr::nest(data = - pid3.wlean) %>%
  mutate(winrates = purrr::map(data, ~ fun_WR(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId)),
         BTlambdas = purrr::map(data, ~ fun_BT(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId,
                                             ref.cat = "Housing_Build_MR_Infill"))
  ) %>%
  dplyr::select(-data) # removing data to conserve memory

efficacy_issue_pub_top3 <- D_efficacy %>%
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  tidyr::nest(data = - imp.top3.housing) %>%
  mutate(winrates = purrr::map(data, ~ fun_WR(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId)),
         BTlambdas = purrr::map(data, ~ fun_BT(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId,
                                             ref.cat = "Housing_Build_MR_Infill"))
  ) %>%
  dplyr::select(-data) # removing data to conserve memory

# classification for this result is off-plan
efficacy_issue_pub_open <- D_efficacy %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  tidyr::nest(data = - imp.open.housing) %>%
  mutate(winrates = purrr::map(data, ~ fun_WR(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId)),
         BTlambdas = purrr::map(data, ~ fun_BT(.x, 
                                             policy_1 = policy_first_code, 
                                             policy_2 = policy_second_code, 
                                             y1 = score_first, 
                                             y2 = score_second, 
                                             id = ResponseId,
                                             ref.cat = "Housing_Build_MR_Infill"))
  ) # %>%
#  dplyr::select(-data) # removing data to conserve memory


# save results for paper and future plotting.  

efficacy_fits <- list(
  full = list(winrate_full, BT_full),
  efficacy_tenure = efficacy_tenure,
  efficacy_want = efficacy_want,
  efficacy_pid3 = efficacy_pid3,
  efficacy_issue_pub_top3 = efficacy_issue_pub_top3, 
  efficacy_issue_pub_open = efficacy_issue_pub_open
  )

saveRDS(efficacy_fits, here::here("data_transformed", "PIPE_paper", "efficacy_fits.RData"))
```

```{r}
#| label: plot_wr_v_BTlambda

# This generates figures for SI Appendix F. The winrate figures for the main paper, which also portray policy preferences, are generated after the policy-preferences code chunks.

wr_bt_plot_fun <- function(winrates, lambdas, plot_title) {
  winrates %>%
    rename_with(~ paste0(., "_wr"), everything()) %>% # rename to distinguish columns from those in lambdas tibble
    inner_join(
      lambdas %>% rename_with(~ paste0(., "_bt"), everything()), 
      by = join_by(policy_wr == policy_bt)
      ) %>%  
    inner_join( # add policy_type from issues tibble, for color coding
      issues %>% dplyr::select(code, plot_shorthand, policy_type),
      by = join_by(policy_wr == code)
    ) %>%
   ggplot(aes(x=estimate_wr, 
             y=estimate_bt, color=policy_type, label=plot_shorthand)) +  
  coord_cartesian(xlim = c(0.10, 0.80), ylim = c(-1.1, 1.8)) +
  geom_errorbar(aes(xmin=conf.low_wr, xmax=conf.high_wr), width=.005) +
  geom_errorbar(aes(ymin=estimate_bt - 1.96*quasiSE_bt, ymax = estimate_bt + 1.96*quasiSE_bt), width=.005) +
#  geom_text_repel(aes(alpha = 0.1), force=25, size = 0.36*geom_text_text)+
  geom_point(size = 0.36*geom_dot_text) +
  geom_smooth(aes(x = estimate_wr, y = estimate_bt), method = "lm", se = FALSE, color = "darkgray") +  # Add linear regression line
  scale_x_continuous(labels=scales::percent, "") + 
  scale_y_continuous("") + 
  scale_color_manual("Policy Type", 
                   values = c("Supply Side - MR" = "#66A61E", 
                              "Supply Side - BMR" = "#E7298A", 
                              "Supply Side - Untargeted" = "#E6AB02", 
                              "Price Control" = "#7570B3", 
                              "Demand Subsidy" = "#A6761D", 
                              "Demand Fence" = "#1B9E77", 
                              "Social" = "#4393C3", 
                              "Economic" = "#B2ABD2")) +
    labs(title=paste0(plot_title))+  
    theme_bw() +
    theme(strip.background = element_blank(),
          legend.position = "bottom",
          #strip.text.y.left = element_text(angle = 90),
          panel.spacing = unit(0.0, "lines"),
          strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text*0.75),
          axis.title.x = element_text(size = axis_text*0.75),
          axis.title.y = element_text(size = axis_text*0.75), #legend.title=element_text(size=8),
          legend.title=element_text(size=graph_text*0.75),
          legend.text=element_text(size=graph_text*0.75),
          axis.text= element_text(size = axis_text*0.75),
          #axis.text.x= element_text(size = axis_text),
          #text=element_text(size=graph_text),
          plot.title = element_text(size=axis_text*0.75, hjust = 0.5)) +
  guides(alpha = "none")  # Remove alpha from the legend
}

wr_bt_full <- wr_bt_plot_fun(winrates = winrate_full, 
                             lambdas = BT_full, 
                             plot_title = "Full Sample")

wr_bt_homeowners <- wr_bt_plot_fun(winrates = efficacy_tenure$winrates[[2]], 
                                lambdas = efficacy_tenure$BTlambdas[[2]], 
                                plot_title = "Homeowners")

wr_bt_renters <- wr_bt_plot_fun(winrates = efficacy_tenure$winrates[[1]], 
                                lambdas = efficacy_tenure$BTlambdas[[1]], 
                                plot_title = "Renters")


wr_bt_wantslower <- wr_bt_plot_fun(winrates = efficacy_want$winrates[[1]], 
                                lambdas = efficacy_want$BTlambdas[[1]], 
                                plot_title = "Wants Lower Prices")

wr_bt_wantsnotlower <- wr_bt_plot_fun(winrates = efficacy_want$winrates[[2]], 
                                lambdas = efficacy_want$BTlambdas[[2]], 
                                plot_title = "Doesn't Want Lower Prices")

wr_bt_Dems <- wr_bt_plot_fun(winrates = efficacy_pid3$winrates[[1]], 
                                lambdas = efficacy_pid3$BTlambdas[[1]], 
                                plot_title = "Democrats")

wr_bt_Repubs <- wr_bt_plot_fun(winrates = efficacy_pid3$winrates[[2]], 
                                lambdas = efficacy_pid3$BTlambdas[[2]], 
                                plot_title = "Republicans")

wr_bt_top3_no <- wr_bt_plot_fun(winrates = efficacy_issue_pub_top3$winrates[[1]], 
                                lambdas = efficacy_issue_pub_top3$BTlambdas[[1]], 
                                plot_title = "Housing Isn't a Top-3 Concern")

wr_bt_top3_yes <- wr_bt_plot_fun(winrates = efficacy_issue_pub_top3$winrates[[2]], 
                                lambdas = efficacy_issue_pub_top3$BTlambdas[[2]], 
                                plot_title = "Housing Is a Top-3 Concern")

wr_bt_MII_no <- wr_bt_plot_fun(winrates = efficacy_issue_pub_open$winrates[[1]], 
                                lambdas = efficacy_issue_pub_open$BTlambdas[[1]], 
                                plot_title = "Housing Isn't Most Important Issue")

wr_bt_MII_yes <- wr_bt_plot_fun(winrates = efficacy_issue_pub_open$winrates[[2]], 
                                lambdas = efficacy_issue_pub_open$BTlambdas[[2]], 
                                plot_title = "Housing Is Most Important Issue")


testplot1 <- (wr_bt_full + plot_spacer()) / 
  (wr_bt_homeowners + wr_bt_renters) /
  (wr_bt_wantslower + wr_bt_wantsnotlower) +
  plot_layout(axis_titles = 'collect',
              guides = 'collect',
              ) & theme(legend.position = 'bottom') 

ggsave(testplot1, path=here::here("figures", "PIPE_paper"),
      file="fig_wr_BT_1.pdf", height=18, width=12) # SI Fig. F.1. 

testplot2 <- (wr_bt_Dems + wr_bt_Repubs) / 
  (wr_bt_top3_yes + wr_bt_top3_no) /
  (wr_bt_MII_yes + wr_bt_MII_no) +
  plot_layout(guides = 'collect',
              axes = 'collect') & theme(legend.position = 'bottom') 

ggsave(testplot2, path=here::here("figures", "PIPE_paper"),
      file="fig_wr_BT_2.pdf", height=18, width=12) # SI Fig. F.2. 

```

```{r}
#| label: perceived_efficacy_group_diff
#| depends: long-df-efficacy-models, functions-efficacy-models
#| eval: true

# This is the off-plan code that calculates standard errors on group differences. (Group contrasts were included in the registered design, but the PAPA did not include code to calculcate SEs on the group diffs.)

winrate_diff_tenure <- D_efficacy %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  mutate(tenure = factor(tenure, levels = c("Owner", "Renter"))) %>%
  fun_WR_group(policy_1 = policy_first_code,
         policy_2 = policy_second_code, 
         y1 = score_first, 
         y2 = score_second, 
         id = ResponseId,
         group = tenure,
         data = .)
  
winrate_diff_want <- D_efficacy %>%
  mutate(
    want.price = case_when(
      want.price %in% c("Higher", "Same") ~ "Not Lower",
      TRUE ~ want.price),
    want.price = factor(want.price, levels = c("Not Lower", "Lower"))
    ) %>%
  fun_WR_group(policy_1 = policy_first_code,
         policy_2 = policy_second_code, 
         y1 = score_first, 
         y2 = score_second, 
         id = ResponseId,
         group = want.price,
         data = .)         

winrate_diff_pid <- D_efficacy %>%
  dplyr::filter(pid3.wlean %in% c("dem", "rep")) %>%
  mutate(pid = factor(pid3.wlean, levels = c("rep", "dem"))) %>%
  fun_WR_group(policy_1 = policy_first_code,
         policy_2 = policy_second_code, 
         y1 = score_first, 
         y2 = score_second, 
         id = ResponseId,
         group = pid,
         data = .)  

winrate_diff_issue_pub_top3 <- D_efficacy %>%
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  mutate(imp.top3.housing = factor(as.logical(imp.top3.housing))) %>%
  fun_WR_group(policy_1 = policy_first_code,
         policy_2 = policy_second_code, 
         y1 = score_first, 
         y2 = score_second, 
         id = ResponseId,
         group = imp.top3.housing,
         data = .) 

# classification for this result is off-plan
winrate_diff_issue_pub_open <- D_efficacy %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  mutate(imp.open.housing = factor(imp.open.housing)
         ) %>%
  fun_WR_group(policy_1 = policy_first_code,
         policy_2 = policy_second_code, 
         y1 = score_first, 
         y2 = score_second, 
         id = ResponseId,
         group = imp.open.housing,
         data = .) 

efficacy_group_diff_fits <- list( 
  winrate_diff_tenure = winrate_diff_tenure,
  winrate_diff_want = winrate_diff_want,
  winrate_diff_pid = winrate_diff_pid,
  winrate_diff_issue_pub_top3 = winrate_diff_issue_pub_top3, 
  winrate_diff_issue_pub_open = winrate_diff_issue_pub_open
  )

saveRDS(efficacy_group_diff_fits, here::here("data_transformed", "PIPE_paper", "efficacy_group_diff_fits.RData"))
```

#### Results

*This section includes text and some code for figures from the PAP which are not included in the paper. Subsequent to filing of the PAP, we realized that the perceived-efficacy and policy-preference results could be effectively conveyed in the same figure. We provide code for those figures later on, after generating sample-mean and split-sample summary statistics for policy preferences.*

To characterize perceptions of relative efficacy, we plot winrates and 95% confidence intervals (standard error clustered on the respondent) for each policy, and we also provide a heatmap showing the proportion of matchups in which a policy of type $i$ beats a policy of type $j$. As shown in @tbl-policies, the focal categories are: supply-side (market rate), supply side (subsidized affordable), demand subsidy, price controls, and policies to "fence out" other demanders.

Because the categorization of policies into types is somewhat subjective, and because we have only a few policies of each type, we can't make strong claims about public opinion by policy type. The heatmap results are only suggestive.

```{r mummolo nall heatmap bench-tester data, fig.cap="Head-To-Head Win Percentage by Policy Category. Simulated Data."}
#| echo: false
#| eval: false

# This figure, modeled on a prior paper by Nall, was included in the PAP but proved to be a less useful way of conveying information than what we settled on for the paper. 

#Create issue areas df

issue_topics <- data.frame(
  Policy = c(
    "Allow more market-rate apartments in existing neighborhoods",
    "Allow more market-rate homes on open land",
    "Allow more subsidized affordable apartments in existing neighborhoods",
    "Allow more subsidized affordable homes on open land",
    "Increase government spending on building affordable housing",
    "Reduce fees and taxes on housing development",
    "Limit how much landlords may increase rents",
    "Limit how much cities may increase property taxes",
    "Make cities approve housing proposals that comply with city rules",
    "Provide more housing vouchers (''Section 8'') to low-income people",
    "Give renters a tax break",
    "Give first-time homebuyers a no-interest government loan for their down payment",
    "Require housing developments to include affordable housing for middle-income people",
    "Require housing developments to include affordable housing for low-income people",
    "Restrict Wall Street investors from buying up homes",
    "Restrict development of market-rate housing on sites that could be developed for subsidized affordable housing in the future",
    "Reduce requirements for off-street parking in new developments"
  ),
  Type = c(
    "Supply Side - MR",
    "Supply Side - MR",
    "Supply Side - BMR",
    "Supply Side - BMR",
    "Supply Side - BMR",
    "Supply Side - Untargeted",
    "Price Control",
    "Price Control",
    "Supply Side - Untargeted",
    "Demand Subsidy",
    "Demand Subsidy",
    "Demand Subsidy",
    "Price Control",
    "Price Control",
    "Demand Fence ",
    "Price Control",
    "Supply Side - Untargeted"
  )
)

##Convert data to B-T model format as per Chris' code above. 
###

h2h <- D_efficacy %>% 
  ##Merge in policy group coding.
  inner_join(issue_topics, by=c("policy_first"="Policy")) %>% 
  rename("policy_first_type"="Type") %>% 
  inner_join(issue_topics, by=c("policy_second"="Policy")) %>% 
  rename("policy_second_type"="Type") %>% 
  ##Filter out ties
  filter(outcome != 3) %>% 
  ##Bin the wins by the policy type, not the specific policy
  mutate(winner_group = ifelse(outcome==1, as.character(policy_first_type), as.character(policy_second_type)),
         loser_group = ifelse(outcome==2, as.character(policy_first_type), as.character(policy_second_type))) %>% 
  dplyr::select(winner_group, loser_group) %>% mutate(count=1)
  
h2h_count <- h2h %>% group_by(winner_group, loser_group) %>% 
  summarise(total_wins=sum(count)) %>% 
  rowwise() %>% 
  mutate(matchup = paste(sort(c(winner_group, loser_group)), collapse = " vs "))

matches <- h2h_count %>% 
  rowwise() %>% 
  mutate(matchup = paste(sort(c(winner_group, loser_group)), collapse = " vs ")) %>% group_by(matchup) %>% 
  summarise(total_games=sum(total_wins))

total_h2h <- h2h_count %>% left_join(matches, by="matchup") %>% 
  mutate(win_pct = total_wins/ total_games) 
#This never happened
#%>% 
 # filter(winner_group != loser_group)

##Code will produce a plot for the pdf and also save a figure to the figures folder

ggplot(total_h2h, aes(x = loser_group, y = winner_group)) +
  geom_tile(aes(fill = win_pct)) +
  scale_fill_gradient(low = "white", high = "red", limits=c(0,1)) +
  geom_text(aes(label=round(win_pct,2)))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(fill = "Win Percentage", y = "Winner Group", x = "Loser Group") +
  geom_tile(data = subset(total_h2h, winner_group < loser_group), fill = "white") +
  geom_text(data = subset(total_h2h, winner_group < loser_group), aes(label = ""), color = "white") +
  theme(legend.position = 'none', text=element_text(size=graph_text))

##The plot is weird because the simulated data doesn't have every policy going against every other policy. But this code is doing the same thing as the heatmap from the SS3 data so it's right. 


ggsave(path=here::here("figures", "PIPE_paper"), file="nm_heat_map_head_to_head.pdf", height=10, width=16)


```

Similarly, Figure @fig-win-rate-bench-tester-data shows win rates of each policy category in the five pairwise efficacy questions each respondent will be presented with. As with the heatmap, the subjective categorization means that these win rates are again merely suggestive.

```{r win-rate-bench-tester-data}
#| echo: false
#| eval: false
#| label: fig-efficacy-by-policy
#| fig-cap: "Pairwise Win Percentage by Policy. Bench Tester Data."


##use D_efficacy 
###This is finding winners by policy. 
win_rate_group <- sim_compare_columns(D_efficacy) %>% 
  sim_win_rate_cluster()


sim_wp <- win_rate_group %>% 
  lm_robust(y ~ Type-1, data=., clusters=id) %>% 
  tidy(.,conf.int=T) %>% 
  mutate(term=str_replace(term, "^Type",""),
         est_rank=rank(-estimate)) 

###Reorder the policy areas based on win percentage

sim_order <- sim_wp %>% 
  arrange(desc(estimate))


sim_wp %>% 
  mutate(term=factor(term, levels=sim_order$term)) %>% 
  ggplot(aes(x=reorder(str_wrap(term,width=70),estimate), y=estimate, group=term, color=term)) +   geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=.4) +
  geom_point() +
  scale_x_discrete("") + 
  scale_y_continuous("Win Percentage", label=scales::percent) + 
  scale_fill_brewer("Policy Type", palette = "Paired") +
  coord_flip() +
  geom_hline(yintercept=.5, linetype="dashed")+
  labs(title="Overall Policy Win Percentage by Category") +
  theme_bw() +
  theme(strip.background = element_blank(),
        legend.position = "none",
        #strip.text.y.left = element_text(angle = 90),
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        #axis.title.x = element_text(size = 8),
        #axis.title.y = element_text(size = 8), #legend.title=element_text(size=8),
        legend.title=element_text(size=main_text),
        legend.text=element_text(size=,main_text),
        axis.text.y= element_text(size = axis_text),
        #axis.text.x= element_text(size = axis_text),
        #text=element_text(size=graph_text),
        plot.title = element_text(size=main_text, hjust = 0.5))

ggsave(path=here::here("figures","PIPE_paper"), file="sim_win_percentages_group.pdf", height=10, width=16)


```

## Issue Preferences

This section provides analysis code for the paper but no formal power analysis. Assuming that the some of the policies have high levels of public support and others much less support, as our pilot suggets (Appendix), we will have no trouble detecting the overall pattern of support.

Each respondent states their preference (endorsing one of two positions) on a random subset of 10 of the 39 issues. In expectation, then, we will have $N*10/39$ observations for each issue. Confidence intervals will therefore average +/- $1.96*(sqrt(p*(1-p)/(N*10/39)))$. With a sample size of 5000 and a "tough" issue in which the probability of endorsing either position is close to 0.5, the 95% confidence interval on the proportion of the population that favors either the liberal position or the conservative positon would be less than +/- 0.03. Within a subgroup comprising just 30% of the sample, the CI would be +/- 0.05.

### Code for Paper

The next code chunk transforms that data into a tibble with one row per policy position per respondent. The dependent variable, $y$, is support for the "lib_position" on that issue. (See @tbl-policies for the full set of policies and the assigned "lib_position" for each. For non-housing policies, the lib_position is the position generally preferred by liberals / Democrats and disfavored by conservatives / Republicans. For housing policies, the lib_position column generally has the policies to "do more" about the problem, whether by increasing production, controlling prices, or otherwise.[^5].)

[^5]: The assignment of policies to column "lib_position" or "con_position" is ultimately arbitrary, but we need to make an assignment for purposes of the analysis

First we organize the data as follows:

-   Col 1: name of first issue position in a question
-   Col 2: name of second issue position in the question (always the negative of the position)
-   Col 3: 1 if first position was chosen, 0 if it lost, 0.5 for ties
-   Col 4: 1 if second position was chosen, 0 if it lost, 0.5 for ties

Finally we split and rowbind the data so that there's one observation per position, subset to the "lib_position,"[^6] and fit a linear model with SEs clustered on the respondent identifier.

[^6]: This subsetting is just a convenience. There's no additional information in the other half of the data, since supporting (opposing) the lib_position is equivalent to opposing (supporting) the con_position.

```{r}
#| label: issue-pref-long-df 

# NB: `position_first_e` is the embedded data field which signifies which matchup in the sequence of 5 (for a given respondent) we're talking about. `policy_first` is the name of the first policy in that matchup.

# CE 2024.02.11. Probably should recode this so that 0.5 is first position winning, -0.5 is second position willing, and ties are 0. For consistency with the conjoint encoding.

D_issues <- D %>%
  pivot_longer(cols = matches("^i_position_A_\\d{1,2}$"),
               names_to = "position_first_e",
               values_to = "position_first")

# `position_second` is the name of the second (opposing) issue position in the matchup.  
D_issues <- D_issues %>%
  mutate(
    position_second = case_when(
      position_first_e == "i_position_A_1" ~ i_position_B_1,
      position_first_e == "i_position_A_2" ~ i_position_B_2,
      position_first_e == "i_position_A_3" ~ i_position_B_3,
      position_first_e == "i_position_A_4" ~ i_position_B_4,
      position_first_e == "i_position_A_5" ~ i_position_B_5,
      position_first_e == "i_position_A_6" ~ i_position_B_6,
      position_first_e == "i_position_A_7" ~ i_position_B_7,
      position_first_e == "i_position_A_8" ~ i_position_B_8,
      position_first_e == "i_position_A_9" ~ i_position_B_9,
      position_first_e == "i_position_A_10" ~ i_position_B_10
    ),
    outcome = case_when(
      position_first_e == "i_position_A_1" ~ Q12.2,
      position_first_e == "i_position_A_2" ~ Q12.4,
      position_first_e == "i_position_A_3" ~ Q12.6,
      position_first_e == "i_position_A_4" ~ Q12.8,
      position_first_e == "i_position_A_5" ~ Q12.10,
      position_first_e == "i_position_A_6" ~ Q12.12,
      position_first_e == "i_position_A_7" ~ Q12.14,
      position_first_e == "i_position_A_8" ~ Q12.16,
      position_first_e == "i_position_A_9" ~ Q12.18,
      position_first_e == "i_position_A_10" ~ Q12.20
    ),
    outcome = as.numeric(outcome),
    score_first = case_match(outcome, 
                             1 ~ 1,
                             2 ~ 0,
                             3 ~ 0.5),
    score_second = case_match(outcome, 
                             1 ~ 0,
                             2 ~ 1,
                             3 ~ 0.5),    
    .after = position_first
  )

# drop rows with no outcome
D_issues <- D_issues %>%
  filter(!is.na(outcome))

# add `code` from issues tibble for the issue.  
D_issues <- D_issues %>%
  left_join(issues %>% dplyr::select(lib_position, code, plot_shorthand), by = c("position_first" = "lib_position")) %>%
  dplyr::rename(code1 = code, 
                plot_shorthand1 = plot_shorthand) %>%
  left_join(issues %>% dplyr::select(lib_position, code, plot_shorthand), by = c("position_second" = "lib_position")) %>%
  dplyr::rename(code2 = code,
                plot_shorthand2 = plot_shorthand) %>%
  mutate(code = coalesce(code1, code2),
         plot_shorthand = coalesce(plot_shorthand1, plot_shorthand2))

# convert position_first_code and position_second_code to factors, using sorted ordering from `issues` tibble
D_issues <- D_issues %>%
  mutate(plot_shorthand = factor(plot_shorthand, levels = issues$plot_shorthand),
         code = factor(code, levels = issues$code))

# Now split and rowbind the data so that there's one observation per issue, subset to the "lib_position" from the issues df, setting up fit of linear model with SEs clustered on respondent. 

# position A wins
temp1 <- D_issues %>%
  dplyr::select(-c(position_second, score_second)) %>%
  dplyr::rename(position = position_first,
                y = score_first)
# position B wins
temp2 <- D_issues %>%
  dplyr::select(-c(position_first, score_first)) %>%
  dplyr::rename(position = position_second,
                y = score_second)

D_issues <- bind_rows(temp1, temp2) %>%
  dplyr::filter(position %in% issues$lib_position) %>%
  dplyr::select(-outcome)


```

```{r}
#| label: issue_pref_functions
#| depends: issue-pref-long-df

# NB: In the code uploaded as the preanalysis plan, there was a minor error which resulted in all models being fit on the full sample, rather than the target subsets. That error has been corrected.

# We have also added, post-PAP, code which will allow future replicators to depart from our original scheme for handling DKs. We  treated them in the PAP as ambivalent responses, coding them as 0.5 or, equivalently, indifferent between the lib_position and the con_position on each issue. This was tantamount to assigning DK respondents with p = 0.5 to the lib_position or the con_position for purposes of ranking policies by support for lib_position. In keeping with our preanalysis plan, the figures in the paper use that encoding of DKs. 

# We also added, post-PAP, a function that returns standard errors on the group-mean differences.

# fun_IP() is the function in our PAP
fun_IP <- function(data, position, y, id, env = rlang::caller_env()) {
    y <- rlang::ensym(y)
    position <- rlang::ensym(position)
    id <- rlang::ensym(id)
    
    rlang::inject( lm_robust(!!y ~ !!position - 1, data=data, clusters = !!id ) )%>%
      tidy(., conf.int = TRUE) %>%
      mutate(position = str_replace(term, "^position", ""),
           est_rank = rank(-estimate)
           ) # %>%
#      dplyr::select(-c(term, outcome)) %>%  # minor off-plan modification for consistency in labeling across functions. Stick with tidy() defaults.
#      dplyr::rename(support = estimate)
}

# fun_IP_2() is the post-PAP variation on fun_IP(), and returns the number of lib_position, con_position, and DK responses on each issue. It also allows user to toggle how DK responses are treated for purposes of estimating support and ranking policies by support for the lib_position. DK = "indifference" is the original solution, counting DKs as indifferent responses. DK = "drop" treats the DKs and NAs. DK = "in_denominator" returns an estimate of support for lib_position as share of total responses: lib, con and DK.
fun_IP_2 <- function(data, code, y, DK = "indifference", id, env = rlang::caller_env()) { # switching position to code is immaterial but facilitates later join
    y <- rlang::ensym(y)
    code <- rlang::ensym(code)
    id <- rlang::ensym(id)
  
  if(DK == "in_denominator"){
    results.df <- rlang::inject( lm_robust(!!y == 1 ~ !!code - 1, data=data, clusters = !!id ) )%>% # DV now is indicator for whether respondent chose the lib_position
      tidy(., conf.int = TRUE) %>%
      mutate(code = str_replace(term, "^code", ""),
           est_rank = rank(-estimate)
           )  
  }
    
  if(DK == "indifference"){
    results.df <- rlang::inject( lm_robust(!!y ~ !!code - 1, data=data, clusters = !!id ) ) %>%  
      tidy(., conf.int = TRUE) %>%
      mutate(code = str_replace(term, "^code", ""),
           est_rank = rank(-estimate)
           )  
  }
    
  if(DK == "drop"){
    data_filtered <- dplyr::filter(data, y != 0.5)
    results.df <- rlang::inject( lm_robust(!!y ~ !!code - 1, data=data_filtered, clusters = !!id ) ) %>%  
      tidy(., conf.int = TRUE) %>%
      mutate(code = str_replace(term, "^code", ""),
           est_rank = rank(-estimate)
           )  
  }
    
    counts <- data %>% 
      group_by(code, y) %>% 
      summarize(n = n()) %>%
      mutate(rf = n / sum(n)) %>%
      dplyr::select(-n) %>%
      pivot_wider(names_from = y, values_from = rf, names_glue = "y_{y}") %>% 
      ungroup() %>%
      rename(rf_con_position = y_0, rf_DK = y_0.5, rf_lib_position = y_1)
    
    results.df <- left_join(results.df, counts) %>%
      dplyr::rename(policy = code) %>% # policy is the name used in perceived-efficacy output
      mutate(term = str_replace(term, "code", "policy")) # for consistency with efficacy output
    
    return(results.df)
}

# fun_IP_2_group() is a post-PAP variation on fun_IP_2() that returns group differences in support with SEs on the diff, but no counts. 
fun_IP_2_group <- function(data, code, y, group, DK = "indifference", id, env = rlang::caller_env()) { # switching position to code is immaterial but facilitates later join
    y <- rlang::ensym(y)
    code <- rlang::ensym(code)
    group <- rlang::ensym(group)
    id <- rlang::ensym(id)
  
  if(DK == "in_denominator"){
    results.df <- rlang::inject( lm_robust(!!y == 1 ~ !!code + !!code:!!group - 1, 
                                           data=data, 
                                           clusters = !!id ) )%>% # DV  is indicator for whether respondent chose the lib_position
      tidy(., conf.int = TRUE) %>%
      mutate(term = str_replace(term, "^code", "policy"),
           ) 
  }
    
  if(DK == "indifference"){
    results.df <- rlang::inject( lm_robust(!!y ~ !!code + !!code:!!group - 1, 
                                           data=data, 
                                           clusters = !!id ) ) %>% 
      tidy(., conf.int = TRUE) %>%
      mutate(term = str_replace(term, "^code", "policy")
           )  
  }
    
  if(DK == "drop"){
    data_filtered <- dplyr::filter(data, y != 0.5)
    results.df <- rlang::inject( lm_robust(!!y ~ !!code + !!code:!!group - 1, 
                                           data=data_filtered, 
                                           clusters = !!id ) )%>% # DV  is indicator for whether respondent chose the lib_position
      tidy(., conf.int = TRUE) %>%
      mutate(term = str_replace(term, "^code", "policy")
           )  
  }
    
    results.df <- results.df %>% # for consistency with efficacy output
      filter(str_detect(term, ":")) %>% # retain just the interaction term
  #    dplyr::rename(policy = code) %>%
      mutate(policy = str_replace(str_extract(term, "^[^:]+"), "policy", ""),
         est_rank = rank(-estimate)
         )  
    return(results.df)
}

```

```{r}
#| label: fit_issue_position_models_PAP
#| depends: issue_pref_functions

# this is the issue-positions code from our PAP. 

positions_full <- fun_IP(data = D_issues, 
                         position = position,
                         y = y,
                         id = ResponseId)


positions_tenure <- D_issues %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  tidyr::nest(data = -tenure) %>%
  mutate(positions = purrr::map(data, ~ fun_IP(., 
                         position = position,
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_want <- D_issues %>%
  mutate(want.price = case_when(
    want.price %in% c("Higher", "Same") ~ "Not Lower",
    TRUE ~ want.price)
  ) %>%
  dplyr::filter(!is.na(want.price)) %>%
  tidyr::nest(data = - want.price) %>%
  mutate(positions = purrr::map(data, ~ fun_IP(., 
                         position = position,
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_pid <- D_issues %>%
  dplyr::filter(!is.na(pid3.wlean)) %>%
  tidyr::nest(data = - pid3.wlean) %>%
  mutate(positions = purrr::map(data, ~ fun_IP(., 
                         position = position,
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_issue_pub_top3 <- D_issues %>%
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  tidyr::nest(data = - imp.top3.housing) %>%
  mutate(positions = purrr::map(data, ~ fun_IP(., 
                         position = position,
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

# coding of the free-text responses is off-plan
positions_issue_pub_open <- D_issues %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  tidyr::nest(data = - imp.open.housing) %>%
  mutate(positions = purrr::map(data, ~ fun_IP(., 
                         position = position,
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

# (plot nicely)
# save results for paper and future plotting

position_fits <- list(
  positions_full = positions_full,
  positions_tenure = positions_tenure,
  positions_want = positions_want,
  positions_pid = positions_pid,
  positions_issue_pub_top3 = positions_issue_pub_top3,
  positions_issue_pub_open = positions_issue_pub_open
  )

 saveRDS(position_fits, here::here("data_transformed", "PIPE_paper", "position_fits.RData"))
 rm(position_fits)
 
 
```

```{r}
#| label: fit_issue_position_models_supp
#| depends: issue_pref_functions

# This returns supplemental fits, specifically (1) relative frequencies of preferences on each policy pair plus standard errors on the proportion who support "lib_position" (putting NAs in the denominator rather than assigning them to each side with equal probability); (2) group differences in support with SEs on the diff. It is used for figures in the SI.

positions_full_2 <- fun_IP_2(data = D_issues, 
                         code = code,
                         DK = "in_denominator",
                         y = y,
                         id = ResponseId)
         
positions_tenure_2 <- D_issues %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  tidyr::nest(data = -tenure) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code,
                         DK = "in_denominator",
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

# for group-diff results, using default classification of DKs (DK = "indifference"), which is what we specified in PAP
positions_tenure_diff <- D_issues %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  mutate(tenure = factor(tenure, levels = c("Owner", "Renter"))) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = tenure,
                 id = ResponseId)

positions_want_2 <- D_issues %>%
  mutate(want.price = case_when(
    want.price %in% c("Higher", "Same") ~ "Not Lower",
    TRUE ~ want.price)
  ) %>%
  dplyr::filter(!is.na(want.price)) %>%
  tidyr::nest(data = - want.price) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code,
                         DK = "in_denominator",
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_want_diff <- D_issues %>%
  mutate(
    want.price = case_when(
      want.price %in% c("Higher", "Same") ~ "Not Lower",
      TRUE ~ want.price),
    want.price = factor(want.price, levels = c("Not Lower", "Lower"))
    ) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = want.price,
                 id = ResponseId)

positions_pid_2 <- D_issues %>%
  dplyr::filter(!is.na(pid3.wlean)) %>%
  tidyr::nest(data = - pid3.wlean) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code,
                         DK = "in_denominator",
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_pid_diff <- D_issues %>% 
  dplyr::filter(pid3.wlean %in% c("dem", "rep")) %>%
  mutate(pid = factor(pid3.wlean, levels = c("rep", "dem"))) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = pid,
                 id = ResponseId)

positions_issue_pub_top3_2 <- D_issues %>%
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  mutate(imp.top3.housing = factor(as.logical(imp.top3.housing))) %>%
  tidyr::nest(data = - imp.top3.housing) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code,
                         DK = "in_denominator",
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_issue_pub_top3_diff <- D_issues %>% 
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  mutate(imp.top3.housing = factor(as.logical(imp.top3.housing))) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = imp.top3.housing,
                 id = ResponseId)

positions_issue_pub_open_2 <- D_issues %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  mutate(imp.open.housing = factor(imp.open.housing)
         ) %>%
  tidyr::nest(data = - imp.open.housing) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code,
                         DK = "in_denominator",
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_issue_pub_open_diff <- D_issues %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  mutate(imp.open.housing = factor(imp.open.housing)
         ) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = imp.open.housing,
                 id = ResponseId)

# (plot nicely)

# save results for paper and future plotting

position_fits_2 <- list(
  positions_full_2 = positions_full_2,
  positions_tenure_2 = positions_tenure_2,
  positions_tenure_diff = positions_tenure_diff,
  positions_want_2 = positions_want_2,
  positions_want_diff = positions_want_diff,
  positions_pid_2 = positions_pid_2,
  positions_pid_diff = positions_pid_diff,
  positions_issue_pub_top3_2 = positions_issue_pub_top3_2,
  positions_issue_pub_top3_diff = positions_issue_pub_top3_diff,
  positions_issue_pub_open_2 = positions_issue_pub_open_2,
  positions_issue_pub_open_diff = positions_issue_pub_open_diff
  )
 
saveRDS(position_fits_2, here::here("data_transformed", "PIPE_paper", "position_fits_2.RData"))
# rm(position_fits_2)

```

```{r}
#| label: fit_issue_position_models_PAP_supp
#| depends: issue_pref_functions

# This returns same fit as the PAP (coding DK as indifference b/t the two positions, the default), but with supplemental info about relative frequencies of preferences plus group differences in policies preferences with SEs on the diff. It is used for the perceived efficacy x support figures in the main paper and the SI.

positions_full_3 <- fun_IP_2(data = D_issues, 
                         code = code, 
                         y = y,
                         id = ResponseId)
         
positions_tenure_3 <- D_issues %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  tidyr::nest(data = -tenure) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code, 
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

# for group-diff results, using default classification of DKs (DK = "indifference"), which is what we specified in PAP
positions_tenure_diff_3 <- D_issues %>%
  dplyr::filter(tenure %in% c("Owner", "Renter")) %>%
  mutate(tenure = factor(tenure, levels = c("Owner", "Renter"))) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = tenure,
                 id = ResponseId)

positions_want_3 <- D_issues %>%
  mutate(want.price = case_when(
    want.price %in% c("Higher", "Same") ~ "Not Lower",
    TRUE ~ want.price)
  ) %>%
  dplyr::filter(!is.na(want.price)) %>%
  tidyr::nest(data = - want.price) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code, 
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_want_diff_3 <- D_issues %>%
  mutate(
    want.price = case_when(
      want.price %in% c("Higher", "Same") ~ "Not Lower",
      TRUE ~ want.price),
    want.price = factor(want.price, levels = c("Not Lower", "Lower"))
    ) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = want.price,
                 id = ResponseId)

positions_pid_3 <- D_issues %>%
  dplyr::filter(!is.na(pid3.wlean)) %>%
  tidyr::nest(data = - pid3.wlean) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code, 
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_pid_diff_3 <- D_issues %>% 
  dplyr::filter(pid3.wlean %in% c("dem", "rep")) %>%
  mutate(pid = factor(pid3.wlean, levels = c("rep", "dem"))) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = pid,
                 id = ResponseId)

positions_issue_pub_top3_3 <- D_issues %>%
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  mutate(imp.top3.housing = factor(as.logical(imp.top3.housing))) %>%
  tidyr::nest(data = - imp.top3.housing) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code, 
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_issue_pub_top3_diff_3 <- D_issues %>% 
  dplyr::filter(!is.na(imp.top3.housing)) %>%
  mutate(imp.top3.housing = factor(as.logical(imp.top3.housing))) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = imp.top3.housing,
                 id = ResponseId)

positions_issue_pub_open_3 <- D_issues %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  mutate(imp.open.housing = factor(imp.open.housing)
         ) %>%
  tidyr::nest(data = - imp.open.housing) %>%
  mutate(positions = purrr::map(data, ~ fun_IP_2(., 
                         code = code, 
                         y = y,
                         id = ResponseId))
         ) %>%
  dplyr::select(-data) # removing data to conserve memory

positions_issue_pub_open_diff_3 <- D_issues %>%
  dplyr::filter(!is.na(imp.open.housing)) %>%
  mutate(imp.open.housing = factor(imp.open.housing)
         ) %>%
  fun_IP_2_group(data = ., 
                 code = code,
                 y = y,
                 group = imp.open.housing,
                 id = ResponseId)

# (plot nicely)

# save results for paper and future plotting

position_fits_3 <- list(
  positions_full_3 = positions_full_3,
  positions_tenure_3 = positions_tenure_3,
  positions_tenure_diff_3 = positions_tenure_diff_3,
  positions_want_3 = positions_want_3,
  positions_want_diff_3 = positions_want_diff_3,
  positions_pid_3 = positions_pid_3,
  positions_pid_diff_3 = positions_pid_diff_3,
  positions_issue_pub_top3_3 = positions_issue_pub_top3_3,
  positions_issue_pub_top3_diff_3 = positions_issue_pub_top3_diff_3,
  positions_issue_pub_open_3 = positions_issue_pub_open_3,
  positions_issue_pub_open_diff_3 = positions_issue_pub_open_diff_3
  )
 
saveRDS(position_fits_3, here::here("data_transformed", "PIPE_paper", "position_fits_3.RData"))
 

```

```{r}
#| label: fig_policy_support_efficacy

# This is Fig 4.1.
# On the support dimension, we are reporting support with DKs coded as indifferent, as specified in PAP

winrate_full %>%
  rename_with(~ paste0(., "_wr"), everything()) %>% # rename to distinguish columns from those in positions tibble
  inner_join(
    positions_full_3 %>% rename_with(~ paste0(., "_supp"), everything()), 
    by = join_by(policy_wr == policy_supp)
    ) %>%  
  inner_join( # add plot_shorthand and policy_type for color coding from issues tibble 
    issues %>% dplyr::select(code, plot_shorthand, policy_type),
    by = join_by(policy_wr == code)
  ) %>%
  ggplot(aes(x=estimate_supp, 
             y=estimate_wr, color=policy_type, label=plot_shorthand)) +   
  geom_errorbar(aes(ymin=conf.low_wr, ymax=conf.high_wr), width=.005) +
  geom_errorbar(aes(xmin=conf.low_supp, xmax=conf.high_supp), width=.005) +
  geom_text_repel(force=25, size = 0.36*geom_text_text)+
  geom_point(size = 0.36*geom_dot_text) +
  scale_x_continuous(labels=scales::percent, "Support (vs. opposite policy)") + 
  scale_y_continuous(labels=scales::percent, "Perceived Efficacy (vs. random policy)") + 
  scale_color_manual("Policy Type", 
                   values = c("Supply Side - MR" = "#66A61E", 
                              "Supply Side - BMR" = "#E7298A", 
                              "Supply Side - Untargeted" = "#E6AB02", 
                              "Price Control" = "#7570B3", 
                              "Demand Subsidy" = "#A6761D", 
                              "Demand Fence" = "#1B9E77", 
                              "Social" = "#4393C3", 
                              "Economic" = "#B2ABD2")) +
  labs(title="") +
  theme_bw() +
  theme(strip.background = element_blank(),
        legend.position = "bottom",
        #strip.text.y.left = element_text(angle = 90),
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        axis.title.x = element_text(size = axis_text),
        axis.title.y = element_text(size = axis_text), #legend.title=element_text(size=8),
        legend.title=element_text(size=graph_text),
        legend.text=element_text(size=graph_text),
        axis.text= element_text(size = axis_text),
        #axis.text.x= element_text(size = axis_text),
        #text=element_text(size=graph_text),
        plot.title = element_text(size=main_text, hjust = 0.5))

ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_efficacy.pdf", height=10, width=16)

```

```{r}
#| label: fig_policy_support_efficacy_groups

# master plot function 

wr_supp_diff_plot <- function(winrate_diff, support_diff, plot_title) {
  winrate_diff %>%
    rename_with(~ paste0(., "_wr"), everything()) %>% # rename to distinguish columns from those in positions tibble
    inner_join(
      support_diff %>% rename_with(~ paste0(., "_supp"), everything()), 
      by = join_by(policy_wr == policy_supp)
      ) %>%  
    inner_join( # add policy_type from issues tibble, for color coding
      issues %>% dplyr::select(code, plot_shorthand, policy_type),
      by = join_by(policy_wr == code)
    ) %>%
    ggplot(aes(x=estimate_supp, 
               y=estimate_wr, color=policy_type, label=plot_shorthand)) +   
    coord_cartesian(xlim = c(-0.4, 0.4), ylim = c(-0.15, 0.15)) +
    geom_errorbar(aes(ymin=conf.low_wr, ymax=conf.high_wr), width=.005) +
    geom_errorbar(aes(xmin=conf.low_supp, xmax=conf.high_supp), width=.005) +
    geom_text_repel(force=25, size = 0.36*geom_text_text)+
    geom_point(size = 0.36*geom_dot_text) +
    geom_vline(xintercept = 0, color = "red", alpha = 0.1, linewidth = 1) +   
    geom_hline(yintercept = 0, color = "red", alpha = 0.1, linewidth = 1) +
    scale_x_continuous(labels=scales::percent, "Support (vs. opposite policy)") + 
    scale_y_continuous(labels=scales::percent, "Perceived Efficacy (vs. random policy)") + 
    scale_color_manual("Policy Type", 
                     values = c("Supply Side - MR" = "#66A61E", 
                                "Supply Side - BMR" = "#E7298A", 
                                "Supply Side - Untargeted" = "#E6AB02", 
                                "Price Control" = "#7570B3", 
                                "Demand Subsidy" = "#A6761D", 
                                "Demand Fence" = "#1B9E77", 
                                "Social" = "#4393C3", 
                                "Economic" = "#B2ABD2")) +
    labs(title=paste0(plot_title))+ # CE: removed "Policy Efficacy and Support Correspondence:"
    theme_bw() +
    theme(strip.background = element_blank(),
          legend.position = "bottom",
          #strip.text.y.left = element_text(angle = 90),
          panel.spacing = unit(0.0, "lines"),
          strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
          axis.title.x = element_text(size = axis_text),
          axis.title.y = element_text(size = axis_text), #legend.title=element_text(size=8),
          legend.title=element_text(size=graph_text),
          legend.text=element_text(size=graph_text),
          axis.text= element_text(size = axis_text),
          #axis.text.x= element_text(size = axis_text),
          #text=element_text(size=graph_text),
          plot.title = element_text(size=main_text, hjust = 0.5))
}

# tenure plot. This is Fig. 4.2.
wr_supp_diff_plot(winrate_diff = winrate_diff_tenure,
                  support_diff = positions_tenure_diff_3,
                  plot_title = "Renters Minus Homeowners")

ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_efficacy_tenure.pdf", height=10, width=16)

# wants-lower-prices plot. SI Fig. C.1.
wr_supp_diff_plot(winrate_diff = winrate_diff_want,
                  support_diff = positions_want_diff_3,
                  plot_title = "Wants Lower Prices Minus Doesn't Want Lower Prices")

ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_efficacy_want.pdf", height=10, width=16)

# pid plot. This is Fig. 4.2.
wr_supp_diff_plot(winrate_diff = winrate_diff_pid,
                  support_diff = positions_pid_diff_3,
                  plot_title = "Democrats Minus Republicans")

ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_efficacy_pid.pdf", height=10, width=16)

# issue public (closed) plot. SI Fig. C.2.
wr_supp_diff_plot(winrate_diff = winrate_diff_issue_pub_top3,
                  support_diff = positions_issue_pub_top3_diff_3,
                  plot_title = "Housing Issue Public (Top 3) Minus Others")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_efficacy_top3.pdf", height=10, width=16)


# issue public (free-text) plot. SI Fig. C.3.
wr_supp_diff_plot(winrate_diff = winrate_diff_issue_pub_open,
                  support_diff = positions_issue_pub_open_diff_3,
                  plot_title = "Housing Issue Public (Free-Text) Minus Others")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_efficacy_MIP.pdf", height=10, width=16)

```

```{r}
#| label: issue_position_all_responses
#| eval: false

# This is SI Fig. D.1. It shows the full distribution of responses to the issue-position question, rather than assigning DKs to support/oppose with p = 0.5. CIs are on proportion of respondents who support 'lib_position'.

# prepare data and facet labels
plot_D <- positions_full_2 %>% 
  left_join(issues %>% dplyr::select(plot_shorthand, code, lib_position, con_position), by = c("policy" = "code")) %>%
  mutate(facet_label = lib_position) # I tried to fit lib_position and con_position into the label but just couldn't do it.

plot_D %>%
  mutate(
    net_support = rf_lib_position - rf_con_position, 
    facet_label = fct_reorder(facet_label, - net_support), # this orders facets in this plot
    constant = "k" # arbitrary column held constant for all respondents; added bc geom_bar demands an x aesthetic and I don't want to subset the data within facets
    ) %>% 
  tidyr::pivot_longer(
    cols = starts_with("rf"),
    names_to = "position",
    values_to = "rf") %>%
  ggplot(aes(x = constant, y = rf, fill = position)) +
  geom_bar(stat = "identity", width = 0.8,  alpha = 0.6) +
  geom_errorbar(aes(ymax = conf.high, ymin = conf.low), width = 0.1) +
  scale_y_continuous(labels=scales::percent, expand=c(0,0), breaks = seq(0, 1, by = 0.2)) + 
  scale_fill_manual(values = c("red", "lightgrey", "blue")) +
     theme(strip.background = element_blank(),
           legend.position = "none",
           plot.margin = ggplot2::margin(0.5, 0.5, 0.5, 0.5, "cm"),  
           axis.text.x = element_text(size = 8), # this is the printed x axis after coor_flip() below
           axis.text.y = element_blank(),
           strip.text = element_text(size = 7, hjust = 0, vjust = 0),
           panel.spacing.x = unit(0.5, "lines"),
           panel.spacing.y = unit(0, "lines"),
           # axis.ticks.x = element_blank(),
           axis.ticks.y = element_blank(),
    #     panel.background=element_rect(fill="white", color="black"),
    #     panel.grid.major = element_line(color = 'grey', linetype = 'dotted'),
    #     panel.spacing = unit(0.2, "lines"),
    #     plot.title = element_text(size = 10),
    #     text=element_text(size=10)
         ) +
  coord_flip(ylim = c(0, 1)) +
  facet_wrap(~ facet_label, ncol = 2, dir = "v", labeller = label_wrap_gen(70)) + 
  labs(x = NULL, y = NULL)

ggsave(path=here::here("figures","PIPE_paper"), file="issue_position_all_responses_v.pdf", height=9, width=7, units = "in")
 

```

## Revealed Issue Importance (Conjoint)

For the analysis of revealed issue importance, we organize the data as follows:

-   One row per profile-pair observation.

-   One "agreement score" column for each issue. The agreement score is 0.5 if "set A" has the respondent's preferred policy on that issue; - 0.5 if set A has the opposing position; and 0 if the respondent said they had no opinion or was not asked their position. We use this scoring convention so that model coefficents are readily interpretable, as explained below.

-   One column for the DV, $y$, scored as 1 if respondent chose set A in the pair, 0 if the respondent chose set B, or NA if respondent skipped the question.

We then fit a linear model of $y$ on agreement scores for every policy and an intercept. The intercept captures the average tendency (if any) to pick the first of the two policy sets. The coefficients on the agreement scores capture the effect of switching a policy in set A switching from "the position the respondent disfavors" to "the position the respondent favors" on the probability that the respondent chooses that set, controlling for the other items in the set.

We had some concern that the randomization of policy selection for each respondent, which limits respondents to "set A" vs. "set B" matchups drawn from 10 policies rather than the full set of 39, might cause coverage problems with te conventional clustered-on-respondent standard errors. Our simulations below show that both conventional clustered standard errors and the block boostrap generate correct coverage rates. Out of an excess of caution, however, we will report the block-bootstrapped standard error as the primary measure of statistical uncertainty.[^7]

[^7]: Its also possible that by assigning "agreement scores" of 0 to cases of missing data (e.g., when a respondent did not state positions on the issues, but did choose a set), we may inflate the apparent number of observations, leading to parametric confidence intervals which are too small. The block bootstrap, which treats the observed distribution of the data as best-guess evidence of its sampling distribution, should be more robust.

### Code for Paper

```{r}
#| label: conjoint-long-df 
#| depends: issue-pref-long-df

D_conjoint <- D %>%
  pivot_longer(cols = c(Q13.2, Q13.4, Q13.6, Q13.8, Q13.10),
               names_to = "conjoint_Q",
               values_to = "y") %>%
  mutate(y = case_match(y, 
                       '1' ~ 1,
                       '2' ~ 0)
         )

# add a column for each policy. filling it in with 0 (neutral position) to start.  
D_conjoint[ , issues$code] <- 0 

# add a column with string for the issues in a matchup, and a string for the setA positions in the matchup
D_conjoint <- D_conjoint %>%
  mutate(
    matchup_issues = case_when(
      conjoint_Q == "Q13.2" ~ paste(c_i_code_1, c_i_code_2, c_i_code_3, sep = "."),
      conjoint_Q == "Q13.4" ~ paste(c_i_code_4, c_i_code_5, c_i_code_6, sep = "."),
      conjoint_Q == "Q13.6" ~ paste(c_i_code_7, c_i_code_8, c_i_code_9, sep = "."),
      conjoint_Q == "Q13.8" ~ paste(c_i_code_10, c_i_code_11, c_i_code_12, sep = "."), 
      conjoint_Q == "Q13.10" ~ paste(c_i_code_13, c_i_code_14, c_i_code_15, sep = ".")
    ),
    setA_positions = case_when(
      conjoint_Q == "Q13.2" ~ paste(c_i_position_A_1, c_i_position_A_2, c_i_position_A_3, sep = "."),
      conjoint_Q == "Q13.4" ~ paste(c_i_position_A_4, c_i_position_A_5, c_i_position_A_6, sep = "."),
      conjoint_Q == "Q13.6" ~ paste(c_i_position_A_7, c_i_position_A_8, c_i_position_A_9, sep = "."),
      conjoint_Q == "Q13.8" ~ paste(c_i_position_A_10, c_i_position_A_11, c_i_position_A_12, sep = "."),
      conjoint_Q == "Q13.10" ~ paste(c_i_position_A_13, c_i_position_A_14, c_i_position_A_15, sep = ".")
    )
  )

# for each row of D_conjoint (i.e. for each matchup of set A v set B)
  # for j in issue codes
    # lookup whether this respondent stated a position on the issue. if not, return 0
    # lookup whether issue is in the conjoint matchup. if not, return 0
    # if it is in the matchup, determine whether set A has the respondents position. if D_issues$y[D_issues$code == issue] for the respondent == 1, the respondent has the lib_position. If it's 0, the respondent has the con_position, and if it's 0.5, the respondent had no preference.
    # if setA has the respondent's position, code the issue as 0.5. if setA does not, code the issue as -0.5.    

# D_conjoint <- D_conjoint[!is.na(D_conjoint$matchup_issues), ] # drop rows from test data before conjoints were introduced to the survey

# loop to create issue scores for each respondent x matchup. NB: the str_replace() functions in this code are necessary to avoid matching problems created by use of parentheses in some of the position statements. The parens don't match as a string unless escaped, so I'm removing them from positions before matching.
for(i in 1:nrow(D_conjoint)) {
  match_issues <- stringr::str_split_1(D_conjoint$matchup_issues[i], "\\.") # issues in the matchup
  D_issues_i <- D_issues[D_issues$ResponseId == D_conjoint$ResponseId[i],] # subset issues data to this respondent. There should be 10 rows, unique issues in each row
  # look up whether respondent subscribes to "lib_position" on each issue in match_issue and update D_conjoint accordingly
  for(j in 1:length(match_issues)){
    if(!match_issues[j] %in% D_issues_i$code) {D_conjoint[i, match_issues[j]] <- 0} # this shouldn't happen but it did with some early test data
    else if(D_issues_i$y[D_issues_i$code == match_issues[j]] == 1 & # respondent has lib position
       str_detect(
         str_replace_all(D_conjoint$setA_positions[i], c("\\(" = "", "\\)" = "")), # setA has lib position
         str_replace_all(issues$lib_position[issues$code == match_issues[j]], c("\\(" = "", "\\)" = ""))
         )
       ) {D_conjoint[i, match_issues[j]] <- 0.5}
    else if(D_issues_i$y[D_issues_i$code == match_issues[j]] == 0 & # respondent has con position
       str_detect(
         str_replace_all(D_conjoint$setA_positions[i], c("\\(" = "", "\\)" = "")), # setA has lib position
         str_replace_all(issues$lib_position[issues$code == match_issues[j]], c("\\(" = "", "\\)" = ""))
         )
       ) {D_conjoint[i, match_issues[j]] <- - 0.5}
    else if(D_issues_i$y[D_issues_i$code == match_issues[j]] == 1 & # respondent has lib position
       str_detect(
         str_replace_all(D_conjoint$setA_positions[i], c("\\(" = "", "\\)" = "")), # setA has con position
         str_replace_all(issues$con_position[issues$code == match_issues[j]], c("\\(" = "", "\\)" = ""))
         )
       ) {D_conjoint[i, match_issues[j]] <- -0.5}
    else if(D_issues_i$y[D_issues_i$code == match_issues[j]] == 0 & # respondent has con position
       str_detect(
         str_replace_all(D_conjoint$setA_positions[i], c("\\(" = "", "\\)" = "")), # setA has con position
         str_replace_all(issues$con_position[issues$code == match_issues[j]], c("\\(" = "", "\\)" = ""))
         )
      ) {D_conjoint[i, match_issues[j]] <- 0.5}
    }
  }
  

# retain only the covariates needed for the analysis 
D_conjoint <- D_conjoint %>% dplyr::select(y:setA_positions, 
                                    ResponseId,  
                                    pid3.wlean,
                                    tenure,
                                    want.price,
                                    imp.top3.housing,
                                    imp.open.housing)


# D_conjoint %>%
#   rowwise() %>%
#   mutate(non_zero_count = sum(c_across(Housing_Build_MR_Infill:Taxes_Gas) != 0, na.rm = TRUE))

```

```{r}
#| label: fit_conjoint_models
#| depends: conjoint-long-df

# The one problem with boot::boot is that you have to feed it a function that returns a vector of statistics, rather than a dataframe, matrix, or list. This makes it tricky to fit multiple models to each bootstrapped replicate. A workaround is to return a vector of coefficients from each model, then reshape the output into a dataframe. The further tricky complication is that if a coefficient is NA in a given model, it's not reported in the "tidied" output, so converting the vector of coefficients into a df requires some care.

m <- ifelse(testing == TRUE, 10, M) # number of bootstrap replications

ResponseId <- unique(D_conjoint$ResponseId)

fun_conjoint_imp <- function(x, i){
  mydata <- left_join(data.frame(ResponseId = x[i]), D_conjoint, by = "ResponseId", relationship = "many-to-many")
  regressors <- names(D_conjoint %>% dplyr::select(Housing_Build_MR_Infill:Taxes_Gas))
  model_formula <- reformulate(termlabels = regressors,
                        response = "y",
                        intercept = TRUE)
  Full <- coefficients(lm(model_formula, data = mydata))
    names(Full) <- paste0(names(Full), ".Full")
  Renters <- coefficients(lm(model_formula, data = mydata %>% filter(tenure == "Renter")))
    names(Renters) <- paste0(names(Renters), ".Renters") 
  Owners <- coefficients(lm(model_formula, data = mydata %>% filter(tenure == "Owner")))
    names(Owners) <- paste0(names(Owners), ".Owners")
  Wants_Lower <- coefficients(lm(model_formula, data = mydata %>% filter(want.price == "Lower")))
    names(Wants_Lower) <- paste0(names(Wants_Lower), ".Wants_Lower")
  Wants_Not_Lower <- coefficients(lm(model_formula, data = mydata %>% filter(want.price %in% c("Higher", "Same"))))
    names(Wants_Not_Lower) <- paste0(names(Wants_Not_Lower), ".Wants_Not_Lower")
  Democrats <- coefficients(lm(model_formula, data = mydata %>% filter(pid3.wlean == "dem")))
    names(Democrats) <- paste0(names(Democrats), ".Democrats")
  Republicans <- coefficients(lm(model_formula, data = mydata %>% filter(pid3.wlean == "rep")))  
    names(Republicans) <- paste0(names(Republicans), ".Republicans")
  Hous_Issue_Public_Top3 <- coefficients(lm(model_formula, data = mydata %>% filter(imp.top3.housing == 1)))
    names(Hous_Issue_Public_Top3) <- paste0(names(Hous_Issue_Public_Top3), ".Hous_Issue_Public_Top3")
  Not_Hous_Issue_Public_Top3 <- coefficients(lm(model_formula, data = mydata %>% filter(imp.top3.housing == 0)))
    names(Not_Hous_Issue_Public_Top3) <- paste0(names(Not_Hous_Issue_Public_Top3), ".Not_Hous_Issue_Public_Top3")
  Hous_Issue_Public_Open <- coefficients(lm(model_formula, data = mydata %>% filter(imp.open.housing == TRUE)))
    names(Hous_Issue_Public_Open) <- paste0(names(Hous_Issue_Public_Open), ".Hous_Issue_Public_Open")
  Not_Hous_Issue_Public_Open <- coefficients(lm(model_formula, data = mydata %>% filter(imp.open.housing != TRUE)))
    names(Not_Hous_Issue_Public_Open) <- paste0(names(Not_Hous_Issue_Public_Open), ".Not_Hous_Issue_Public_Open")  
  
    return(c(Full, Renters, Owners, Wants_Lower, Wants_Not_Lower, Democrats, Republicans, Hous_Issue_Public_Top3, Not_Hous_Issue_Public_Top3, Hous_Issue_Public_Open, Not_Hous_Issue_Public_Open))
}

conjoint_models <- boot(ResponseId, fun_conjoint_imp, m)
tidied_conjoint_models <- tidy(conjoint_models, conf.int = TRUE, conf.method = "perc") # This is what we'll use in the paper (confidence intervals generated with the percentile method), but it breaks on the test data due to missingness in the small sample.
tidied_conjoint_models <- tidied_conjoint_models %>% tidyr::separate_wider_delim("term", ".", names = c("term", "model"))

saveRDS(conjoint_models, here::here("data_transformed", "PIPE_paper", "conjoint_fits.RData"))
saveRDS(tidied_conjoint_models, here::here("data_transformed", "PIPE_paper", "conjoint_fits_tidied.RData"))

```

```{r}
#| label: get_CIs_group_diffs
#| depends: fit_conjoint_models

# This code chunk was added post-PAP, to generate confidence intervals on group differences.

# label bootstrap replicates for convenience
boot_D <- as.data.frame(conjoint_models$t)
# names(boot_D) <- tidied_conjoint_models$term
names(boot_D) <- paste0(tidied_conjoint_models$term, ".", tidied_conjoint_models$model)

# Loop over groups to obtain point estimate and CI on group differences. Use same shorthand as in fun_conjoint_imp().
conjoint_group_diff = list() # empty list to hold results

groups <- list(
  tenure = c("Renters", "Owners"),
  want = c("Wants_Lower", "Wants_Not_Lower"),
  pid = c("Democrats", "Republicans"),
  issue.pub.top3 = c("Hous_Issue_Public_Top3", "Not_Hous_Issue_Public_Top3"),
  issue.pub.open = c("Hous_Issue_Public_Open", "Not_Hous_Issue_Public_Open")
)

for(i in 1:length(groups)){
  temp1 <- boot_D[, str_detect(names(boot_D), paste0("\\.", groups[[i]][1]))]
  temp2 <- boot_D[, str_detect(names(boot_D), paste0("\\.", groups[[i]][2]))]
  
  diff = temp1 - temp2
  names(diff) = str_extract(names(diff), "^[^.]+")
  conf.low = map_vec(diff, ~ quantile(.x, probs = 0.05)) 
  conf.high = map_vec(diff, ~ quantile(.x, probs = 0.95)) 
  # estimate is difference in point estimates from model fit to the unbootstrapped dataset
  estimate = conjoint_models$t0[str_detect(names(conjoint_models$t0), paste0("\\.", groups[[i]][1]))] - 
    conjoint_models$t0[str_detect(names(conjoint_models$t0), paste0("\\.", groups[[i]][2]))]
  
  conjoint_group_diff[[i]] <- tibble(
    term = names(diff),
    estimate = estimate,
    conf.low = conf.low,
    conf.high = conf.high
  )
  
  names(conjoint_group_diff)[i] <- paste0(groups[[i]][1], ".minus.", groups[[i]][2])
}

saveRDS(conjoint_group_diff, here::here("data_transformed", "PIPE_paper", "conjoint_group_diff.RData"))
 
```

```{r}
#| label: plot_revealed_importance
#| depends: fit_conjoint_models

# This plots Fig. 4.5.

# load fitted model results if not compiling from scratch
# tidied_conjoint_models <- readRDS(file=here::here("data_transformed",
#                                     "PIPE_paper",
#                                     "conjoint_fits_tidied.RData"))

shape_mapping <- c("Social" = 0, "Economic" = 0,
                   "Demand Fence" = 1,
                   "Supply Side - MR" = 2, "Supply Side - BMR" = 3,
                   "Supply Side - Untargeted" = 4, "Price Control" = 5,
                   "Demand Subsidy" = 6)

tidied_conjoint_models %>%
  filter(model=="Full" & term != "(Intercept)") %>% 
  left_join(dplyr::select(issues, c(plot_shorthand, code, policy_type)), by=c("term"="code")) %>%  
  mutate(housing = ifelse(policy_type %in% c("Social", "Economic"), 0, 1)) %>%
  ggplot(aes(y=reorder(plot_shorthand, statistic), x=statistic, 
             color=factor(policy_type), 
             alpha=factor(housing), 
             shape=factor(policy_type), 
             size=factor(housing))) + 
  geom_errorbar(aes(xmin=conf.low, xmax=conf.high)) +
  scale_color_manual("Policy Type", 
                   values = c("Supply Side - MR" = "#66A61E", 
                              "Supply Side - BMR" = "#E7298A", 
                              "Supply Side - Untargeted" = "#E6AB02", 
                              "Price Control" = "#7570B3", 
                              "Demand Subsidy" = "#A6761D", 
                              "Demand Fence" = "#1B9E77", 
                              "Social" = "#4393C3", 
                              "Economic" = "#B2ABD2")) +
  geom_point(size=3) +
  scale_shape_manual("Policy Type",
                     values=shape_mapping) +
  scale_x_continuous("") + 
  scale_y_discrete("") + 
  labs(title="Revealed Policy Importance",
       alpha="none",
        x = "",  
       size="none")+
  theme_bw() +
  scale_alpha_discrete(guide="none",range=c(.4,1))+
  scale_size_discrete(guide="none",range=c(.5,1))+
  theme(strip.background = element_blank(),
        legend.position = "bottom",
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        legend.title=element_text(size=0.6*main_text),
        legend.text=element_text(size=0.6*main_text),
        axis.text= element_text(size = axis_text),
        plot.title = element_text(size=main_text, hjust = 0.5), 
        legend.margin = margin(0, 2, 0, 0),
        plot.margin = ggplot2::margin(0.5,1,0.5,0,"cm")) + 
  guides(
    color = guide_legend(ncol = 3), # Spread legend over 3 columns 
  )

ggsave(path=here::here("figures", "PIPE_paper"), 
       file="conjoint_revealed_issue_importance.pdf", height=16, width=16)
```

```{r}
#| label: plot_revealed_importance_and_support

# This code chunk generates figures for SI Appendix C. 

tidied_conjoint_models %>%
  filter(model=="Full" & term != "(Intercept)") %>% 
  rename_with(~ paste0(., "_ri"), everything()) %>% # rename to distinguish from positions tibble; ri for revealed importance 
  inner_join(
    positions_full_3 %>% rename_with(~ paste0(., "_supp"), everything()), 
    by = join_by(term_ri == policy_supp)
    ) %>%  
  inner_join( # add plot_shorthand and policy_type for color coding from issues tibble 
    issues %>% dplyr::select(code, plot_shorthand, policy_type),
    by = join_by(term_ri == code)
  ) %>%
  mutate(housing = ifelse(policy_type %in% c("Social", "Economic"), 0, 1)) %>%
  ggplot(aes(x=estimate_supp, 
             y=statistic_ri, 
             color=policy_type, 
             label=plot_shorthand,
             alpha=factor(housing))) +   
  coord_cartesian(xlim = c(0, 1),
                ylim = c(0, 0.6)) +
  geom_errorbar(aes(ymin=conf.low_ri, ymax=conf.high_ri), width=.005) +
  geom_errorbar(aes(xmin=conf.low_supp, xmax=conf.high_supp), width=.005) +
  geom_text_repel(force=25, size = 0.36*geom_text_text)+
  geom_point(size = 0.36*geom_dot_text) +
  scale_x_continuous(labels=scales::percent, 
                     breaks = round(seq(0, 1, by = 0.2), digits = 2),
                     "Support (vs. opposite policy)") + 
  scale_y_continuous("Revealed Policy Importance") + 
  scale_color_manual("Policy Type", 
                   values = c("Supply Side - MR" = "#66A61E", 
                              "Supply Side - BMR" = "#E7298A", 
                              "Supply Side - Untargeted" = "#E6AB02", 
                              "Price Control" = "#7570B3", 
                              "Demand Subsidy" = "#A6761D", 
                              "Demand Fence" = "#1B9E77", 
                              "Social" = "#4393C3", 
                              "Economic" = "#B2ABD2")) +
  labs(title="") +
  theme_bw() +
  theme(strip.background = element_blank(),
        legend.position = "bottom",
        #strip.text.y.left = element_text(angle = 90),
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        axis.title.x = element_text(size = axis_text),
        axis.title.y = element_text(size = axis_text), #legend.title=element_text(size=8),
        legend.title=element_text(size=graph_text),
        legend.text=element_text(size=graph_text),
        axis.text= element_text(size = axis_text),
        #axis.text.x= element_text(size = axis_text),
        #text=element_text(size=graph_text),
        plot.title = element_text(size=main_text, hjust = 0.5)) +
  guides(alpha = "none") # remove factor(housing) from legend

ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_ri.pdf", height=10, width=16) # This is SI Fig. C.4. 


# Function for subsetted group-diff plots.  
ri_supp_diff_plot_fun <- function(ri_diff, support_diff, plot_title) {
  ri_diff %>%
    rename_with(~ paste0(., "_ri"), everything()) %>% # rename to distinguish columns from those in positions tibble
    inner_join(
      support_diff %>% rename_with(~ paste0(., "_supp"), everything()),
      by = join_by(term_ri == policy_supp)
      ) %>%  
    inner_join( # add policy_type from issues tibble, for color coding
      issues %>% dplyr::select(code, plot_shorthand, policy_type),
      by = join_by(term_ri == code)
    ) %>%
    mutate(housing = ifelse(policy_type %in% c("Social", "Economic"), 0, 1)) %>%
    ggplot(aes(x=estimate_supp, 
             y=estimate_ri, 
             color=policy_type, 
             label=plot_shorthand,
             alpha=factor(housing))) +   
    coord_cartesian(xlim = c(-0.3, 0.5), ylim = c(-0.25, 0.25)) +
    geom_errorbar(aes(ymin=conf.low_ri, ymax=conf.high_ri), width=.005) +
    geom_errorbar(aes(xmin=conf.low_supp, xmax=conf.high_supp), width=.005) +
    geom_text_repel(force=25, size = 0.36*geom_text_text)+
    geom_point(size = 0.36*geom_dot_text) +
    geom_vline(xintercept = 0, color = "red", alpha = 0.1, linewidth = 1) +   
    geom_hline(yintercept = 0, color = "red", alpha = 0.1, linewidth = 1) +
    scale_x_continuous(labels=scales::percent, "Support (vs. opposite policy)") + 
    scale_y_continuous("Revealed Policy Importance") + 
    scale_color_manual("Policy Type", 
                     values = c("Supply Side - MR" = "#66A61E", 
                                "Supply Side - BMR" = "#E7298A", 
                                "Supply Side - Untargeted" = "#E6AB02", 
                                "Price Control" = "#7570B3", 
                                "Demand Subsidy" = "#A6761D", 
                                "Demand Fence" = "#1B9E77", 
                                "Social" = "#4393C3", 
                                "Economic" = "#B2ABD2")) +
    labs(title=paste0(plot_title)) +  
    theme_bw() +
    theme(strip.background = element_blank(),
          legend.position = "bottom",
          #strip.text.y.left = element_text(angle = 90),
          panel.spacing = unit(0.0, "lines"),
          strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
          axis.title.x = element_text(size = axis_text),
          axis.title.y = element_text(size = axis_text), #legend.title=element_text(size=8),
          legend.title=element_text(size=graph_text),
          legend.text=element_text(size=graph_text),
          axis.text= element_text(size = axis_text),
          #axis.text.x= element_text(size = axis_text),
          #text=element_text(size=graph_text),
          plot.title = element_text(size=main_text, hjust = 0.5)) +
     guides(alpha = "none") # remove factor(housing) from legend
}

# tenure plot. SI Fig. C.5. 
ri_supp_diff_plot_fun(ri_diff = conjoint_group_diff[["Renters.minus.Owners"]],
                  support_diff = positions_tenure_diff_3,
                  plot_title = "Renters Minus Homeowners")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_ri_tenure.pdf", height=10, width=16)

# want-price plot. SI Fig. C.6. 
ri_supp_diff_plot_fun(ri_diff = conjoint_group_diff[["Wants_Lower.minus.Wants_Not_Lower"]],
                  support_diff = positions_want_diff_3,
                  plot_title = "'Wants Lower Prices' Minus 'Doesn't Want Lower Prices'")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_ri_want.pdf", height=10, width=16)

# pid plot. SI Fig. C.7. 
ri_supp_diff_plot_fun(ri_diff = conjoint_group_diff[["Democrats.minus.Republicans"]],
                  support_diff = positions_pid_diff_3,
                  plot_title = "Democrats Minus Republicans")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_ri_pid.pdf", height=10, width=16)

# top3 plot. SI Fig. C.8. 
ri_supp_diff_plot_fun(ri_diff = conjoint_group_diff[["Hous_Issue_Public_Top3.minus.Not_Hous_Issue_Public_Top3"]],
                  support_diff = positions_issue_pub_open_diff_3,
                  plot_title = "'Housing Issue Public (Top 3)' Minus Others")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_ri_top3.pdf", height=10, width=16)

# MIP plot. SI Fig. C.9. 
ri_supp_diff_plot_fun(ri_diff = conjoint_group_diff[["Hous_Issue_Public_Open.minus.Not_Hous_Issue_Public_Open"]],
                  support_diff = positions_tenure_diff_3,
                  plot_title = "'Housing Issue Public (Free Text)' Minus Others")
ggsave(path=here::here("figures", "PIPE_paper"), 
       file="fig_policy_support_ri_MIP.pdf", height=10, width=16)

```

```{r}
#| label: importance_by_policy_preference

## This generates results for Fig. 4.6. It is an off-plan chunk that separately calculated revealed issue importance by people who are on each side of the policy dispute.

set.seed((seed1))

# Step 1: create factor variable for whether respondent subscribes to lib_position or con_position on each issue. This will be used for subsetting. Respondents who subscribe to neither position are dropped. 
ResponseId_positions <- D_issues %>%
  dplyr::select(ResponseId, y, code) %>%
  pivot_wider(names_from = code,
              values_from = y) %>%
  mutate(across(Land_Bank:Taxes_Gas, ~ case_when(. == 1 ~ "lib",
                                                 . == 0 ~ "con",
                                                 TRUE ~ NA_character_)),
         across(Land_Bank:Taxes_Gas, ~ factor(., levels = c("con", "lib")))
         ) 

# prepend "group" to issue codes
names(ResponseId_positions)[2:length(names(ResponseId_positions))] <- 
  paste0("group.", names(ResponseId_positions)[2:length(names(ResponseId_positions))])

# join to D_conjoint
D_conjoint <- D_conjoint %>%
  left_join(ResponseId_positions)


ResponseId <- unique(D_conjoint$ResponseId)

groups <- names(D_conjoint)[str_detect(names(D_conjoint), "^group.")]
regressors <- names(D_conjoint %>% dplyr::select(Housing_Build_MR_Infill:Taxes_Gas))
  
fun_conjoint_imp_interact <- function(x, i, groups, regressors){
  
  mydata <- left_join(data.frame(ResponseId = x[i]), D_conjoint, by = "ResponseId", relationship = "many-to-many")
  
  fit_fun <- function(group){ 
    model_formula <- reformulate(
      termlabels = c(regressors, paste0(group, ":", regressors)),
      response = "y",
      intercept = TRUE)
   coefficients(lm(model_formula, data = mydata))
  }
    
  results <- map(groups, fit_fun) %>% unlist(.)
  
return(results[str_detect(names(results), ":")]) # grab the interaction terms
}

conjoint_by_position_diff <- boot(ResponseId, fun_conjoint_imp_interact, M, groups = groups, regressors = regressors)

conjoint_by_position_diff_tidied <- conjoint_by_position_diff %>% tidy(., conf.int = TRUE, conf.method = "perc")

saveRDS(conjoint_by_position_diff_tidied, here::here("data_transformed", "PIPE_paper", "conjoint_by_position_diff_tidied.RData"))

```

```{r}
#| label: plot_importance_by_policy_preference

# This plots Fig. 4.6 (off-plan).  

D_plot <- conjoint_by_position_diff_tidied %>%
  mutate( # this creates logical indicator for whether a term corresponds to sample split on the policy whose differential importance to each group "term" estimates
    target_contrast = str_remove_all(term, "group\\.|lib|con"),
    target_contrast = vapply(strsplit(target_contrast, ":", fixed = TRUE), anyDuplicated, 1L) > 0L,
    code = str_extract(term, "^[^:]+")
    ) %>%
  filter(target_contrast == TRUE) %>%
  left_join(dplyr::select(issues, c(plot_shorthand, code, policy_type))) %>%
  mutate(housing = ifelse(policy_type %in% c("Social", "Economic"), 0, 1))
    
shape_mapping <- c("Social" = 0, "Economic" = 0,
                   "Demand Fence" = 1,
                   "Supply Side - MR" = 2, "Supply Side - BMR" = 3,
                   "Supply Side - Untargeted" = 4, "Price Control" = 5,
                   "Demand Subsidy" = 6)

##Sort by group-difference in revealed importance and plot

D_plot %>%  
  left_join(dplyr::select(issues, c(code, plot_shorthand))) %>%  
  ggplot(aes(y=reorder(plot_shorthand, statistic), x=statistic, 
             color=factor(policy_type), 
             alpha=factor(housing), shape=factor(policy_type), size=factor(housing))) + 
  geom_errorbar(aes(xmin=conf.low, xmax=conf.high)) +
  scale_color_manual("Policy Type", 
                   values = c("Supply Side - MR" = "#66A61E", 
                              "Supply Side - BMR" = "#E7298A", 
                              "Supply Side - Untargeted" = "#E6AB02", 
                              "Price Control" = "#7570B3", 
                              "Demand Subsidy" = "#A6761D", 
                              "Demand Fence" = "#1B9E77", 
                              "Social" = "#4393C3", 
                              "Economic" = "#B2ABD2")) +
  geom_point(size=3) +
  geom_vline(xintercept = 0, color = "red", alpha = 0.2, size = 1) +
  scale_shape_manual("Policy Type",
                     values=shape_mapping) +
  scale_x_continuous(breaks = round(seq(-0.6, 0.6, by = 0.2), digits = 2)) + 
  scale_y_discrete("") + 
  labs(title="Revealed Importance: Supporters Minus Opponents",
       alpha="none",
        x = "",  
       size="none")+
  theme_bw() +
  scale_alpha_discrete(guide="none",range=c(.4,1))+
  scale_size_discrete(guide="none",range=c(.5,1))+
  theme(strip.background = element_blank(),
        legend.position = "bottom",
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        legend.title=element_text(size=0.6*main_text),
        legend.text=element_text(size=0.6*main_text),
        axis.text= element_text(size = axis_text),
        plot.title = element_text(size=main_text, hjust = 0.5),
        legend.margin = margin(0, 2, 0, 0)) + 
  guides(
    color = guide_legend(ncol = 3), # Spread legend over 3 columns 
  )

 ggsave(path=here::here("figures", "PIPE_paper"), 
       file="conjoint_importance_supporters_minus_opponents.pdf", height=16, width=16)


```

## Other Exploratory Results

*Though it's not central to this study, we explored the feasibility of constructing issue-public classifications using natural language processing of a free-text question asking respondents whether any issue in state politics is particularly important to them. The PAP called for this investigation, but did not commit to a method for doing it. The method we settled on using the process described here was subsequently added to the import-and-recode section of this document as `free_text_issue_public_classify`.*

*Our off-plan process went as follows:*

1.  *We created word cloud and keyness plots to get a feel for the issues that respondents named.*

2.  *We wrote a codebook for hand-coding of responses into fifteen categories: the twelve categories of `imp.top3` (the closed-form question on which respondents may designated up to 3 of twelve listed issues as most important to them), plus "other," "none," and "nonresponsive."*

3.  *We wrote a bag-of-words classifiers to automate classification of the respondents into these categories.*

4.  *After settling on the codebook, the three co-PIs jointly hand-coded the first 200 non-missing responses, inspected one another's coding, and resolved disagreements. We also agreed to treat our hand-coding of these 200 responses as training data for Chat GPT.*

5.  *We tried feeding the codebook, hand-coding of the first 200 responses, and full dataset to Chat GPT for classification of the other responses. GPT proposed a richer bag-of-words classifier but hung up trying to do the coding.*

6.  *We wrote a second bag-of-words classifer after considering GPT's proposal. The second classifier incorporates many of GPT's suggestions, while maintaining our original approach of matching word roots rather than complete words (e.g., "immigra" rather than "immigration," "immigrating," "immigrants," etc.).*

7.  *We drew 200 responses at random from the remaining non-missing responses, and each independently hand-coded these responses to get a measure of inter-coder reliability.*

8.  *We applied both bag of words classifiers.*

9.  *We calculated intercoder reliability between each pair of coders / classifiers.*

### A Free-Text Measure of Issue Publics

```{r free text issue public all}

# This generates a wordcloud plot that we did not include in the paper, because we thought the faceted plots in SI Appendix B were easier to interpret. 

#Chris already set up seed. Use the same one to not mess other code up. Use seed1
set.seed(seed1)

##Ingest the open-ended issue public question
dfm_responses <- corpus(D$imp.open.text) %>% dfm(remove=stopwords("english"), remove_punct=T) 
##word must appear 5 times. Stantcheva does this.
textplot_wordcloud(dfm_responses, min_count=5)

pdf(file=here::here("figures","PIPE_paper","issue_priority_wordcloud.pdf"), height=10, width=16)
##word must appear 5 times. Stantcheva does this.
textplot_wordcloud(dfm_responses, min_count=5)
dev.off()

ggsave(path=here::here("figures","PIPE_paper"), file="issue_priority_wordcloud.pdf", height=10, width=16)

```

```{r free text issue public keyness plots}

# This generates a keyness plots that we did not include in the paper, because we thought the faceted plots in SI Appendix B were easier to interpret. 

##Chris already set up seed. Use the same one to not mess other code up. Use seed1
set.seed(seed1)

##Ingest the open-ended issue public question
key <- corpus(D$imp.open.text) 

###create variables 
key$tenure <- ifelse(D$ownhome==1, "Homeowner","Renter")

key$high_low <- ifelse(D$want.price=="Lower","Lower","Same or Higher")

##Keyness plots can only handle 2 variables. We make Democrats our target and GOP/Ind the reference. 
key$party <- ifelse(D$pid3.wlean=="dem","Democrat","GOP or Ind.")

###There are NAs in the data now, we have to drop them.

key <- key %>% corpus_subset(!is.na(tenure) & !is.na(high_low) & !is.na(party))


###Homeowners versus renters 
key %>%
  tokens(remove_punct = T) %>% 
  tokens_remove(stopwords("english")) %>% 
  tokens_group(group=tenure) %>% 
  dfm() %>% textstat_keyness(., target = "Homeowner") %>% textplot_keyness()

ggsave(path=here::here("figures","PIPE_paper"), file="issue_priority_keyness_tenure.pdf", height=10, width=16)


###Those who Want the prices Lower vs those who want it same or higher
key %>%
  tokens(remove_punct = T) %>% 
  tokens_remove(stopwords("english")) %>% 
  tokens_group(group=high_low) %>% 
  dfm() %>% textstat_keyness(., target = "Lower") %>% textplot_keyness()

ggsave(path=here::here("figures","PIPE_paper"), file="issue_priority_keyness_price_desire.pdf", height=10, width=16)

##Party ID
key %>%
  tokens(remove_punct = T) %>% 
  tokens_remove(stopwords("english")) %>% 
  tokens_group(group=party) %>% 
  dfm() %>% textstat_keyness(., target = "Democrat") %>% textplot_keyness()

ggsave(path=here::here("figures","PIPE_paper"), file="issue_priority_keyness_party.pdf", height=10, width=16)


```

The next code chunks describe the analytical path by which we arrived at a classifier for responses to the free-text most-important-issue in state politics question.

```{r}
#| label: free_text_issue_public_classify_bag_1  


# Classify respondents by their MIP free-text answer, using bag-of-words approach.  

# Make tibble of categories and keywords for each. The non-automated approach. Categories here correspond to the closed-form issue public question, to which respondents are encoded as imp.top3.x, where x is the issue
issue_pub_categories <- tibble::tribble(
  ~ cat, ~ strings,
  "cost.housing.bag1", "hous|rent|homes|property tax",  
  "homeless.bag1", "homeless|unhoused",
  "inflation.bag1", "cost|inflation|price",
  "immigration.bag1", "immigra|border|migrant",
  "crime.bag1", "crime|criminal|violent|violence",
  "abortion.bag1", "abortion|reproductive",
  "jobs.bag1", "job|wage",
  "education.bag1", "educat|school|college",
  "environment.bag1", "enviro|climate|nature|warming",
  "taxes.bag1", "tax",
  "healthcare.bag1", "health|prescription drug",
  "racism.bag1", "race|racial|discrim|black|woke"
)

# function to create imp.open indicators
create_imp.open.i <- function(data, categories) {
  for (i in 1:nrow(categories)) {
    category <- categories$cat[i]
    indicator <- str_detect(
      string = str_to_lower(data$imp.open.text), 
      pattern = categories$strings[i])
    data[[paste0("imp.open.", category)]] <- indicator
  }
  return(data)
}

# make the indicators
D <- create_imp.open.i(data = D, categories = issue_pub_categories)

# create an indicator for respondents who list either housing or homelessness as their top issue
D$imp.open.housing.or.homeless.bag1 <- D$imp.open.cost.housing.bag1 | D$imp.open.homeless.bag1

D %>% summarize(across(
  imp.open.cost.housing.bag1:imp.open.housing.or.homeless.bag1, 
  \(x) round(mean(x, na.rm = TRUE), digits = 2)))


```

```{r free_text_issue_public_exports}

# Download first 200 responses (not NA) and code by hand. Use these as training data.

# From balance of the data, randomly select 200 responses for independent coding by co-PIs to generate measures of intercoder reliability (amongst themselves, and with GPT)

set.seed(seed1)

D %>%  # this is the first 200, used to calibrate codebook, human-coder responses, and train GPT
  dplyr::select(ResponseId, imp.open.text) %>% 
  filter(!is.na(imp.open.text)) %>%
  slice_head(n = 200) %>%
  write_csv(here::here("data_transformed","PIPE_paper","MII_text_first_200.csv"))

D %>% # this is a random set for intercoder reliability checks
  dplyr::select(ResponseId, imp.open.text) %>% 
  filter(!is.na(imp.open.text)) %>%
  slice(201:n()) %>%
  slice_sample(n = 200, replace = FALSE) %>%
  write_csv(here::here("data_transformed","PIPE_paper","MII_text_random_200.csv"))



```

*Chat GPT proposed the following bag of words classifier, based on our codebook:*

*categories = { 1: \['housing', 'home', 'rent', 'affordable', 'mortgage', 'property tax', 'insurance', 'shelter'\], 2: \['abortion'\], 3: \['jobs', 'employment', 'unemployment', 'work', 'working conditions', 'wages'\], 4: \['inflation', 'cost of goods', 'rising cost', 'prices'\], 5: \['crime', 'social disorder', 'illegal drugs', 'violence', 'gun control'\], 6: \['education', 'access to education', 'quality of education', 'cost of education', 'college debt', 'woke'\], 7: \['environment', 'climate change', 'global warming', 'endangered species', 'pollution'\], 8: \['tax', 'taxes', 'reducing taxes', 'increasing taxes'\], 9: \['health care', 'quality of care', 'cost of health care', 'medical'\], 10: \['immigration', 'border wall', 'visas', 'sanctuary cities'\], 11: \['racism', 'racial justice', 'discrimination', 'reparations', 'race relations', 'bias'\], 12: \['homeless', 'camping on streets', 'panhandling', 'sleeping on park benches', 'urinating in public'\], 13: \['economy', 'political issue'\], 14: \['no political issue', 'not care'\], 15: \['nonresponsive', 'gibberish', 'what I ate for breakfast'\] }*

```{r}
#| label: free_text_issue_public_classify_bag_2  
#| depends: free_text_issue_public_classify_bag_1

# This is our revised classifier, adding some of GPTs suggestions, but not words that we expect to have different meetings (e.g., "work" could be used to decribe something that "isn't working," rather than to express concern about jobs.) 

issue_pub_categories_2 <- tibble::tribble(
  ~ cat, ~ strings,
  "cost.housing.bag2", "hous|rent|homes|property tax|mortgage|property insurance|homeowner insurance|homeowners insurance|home insurance|homeowner's insurance",  
  "homeless.bag2", "homeless|unhoused|panhandl",
  "inflation.bag2", "inflation|cost of|rising cost|rising price|prices",
  "immigration.bag2", "immigra|border|migrant|sanctuary",
  "crime.bag2", "crime|criminal|violent|violence|illegal drug|gun control|disorder",
  "abortion.bag2", "abortion|reproductive",
  "jobs.bag2", "job|wage|employment|unemployment",
  "education.bag2", "educat|school|college|woke",
  "environment.bag2", "enviro|climate|nature|warming|pollut|endangered species|",
  "taxes.bag2", "tax",
  "healthcare.bag2", "health|prescription drug|medical|medicine",
  "racism.bag2", "race|racial|discrim|black|woke|reparation|bias"
)

# make the indicators
D <- create_imp.open.i(data = D, categories = issue_pub_categories_2)

# create an indicator for respondents who list either housing or homelessness as their top issue
D$imp.open.housing.or.homeless.bag2 <- D$imp.open.cost.housing.bag2 | D$imp.open.homeless.bag2

D %>% summarize(across(imp.open.cost.housing.bag2:imp.open.housing.or.homeless.bag2, \(x) round(mean(x, na.rm = TRUE), digits = 2)))


```

```{r}
#| label: eval_free_text_classifiers
#| depends: free_text_issue_public_classify_bag_1, free_text_issue_public_classify_bag_1

# This code chunk evaluates consistency of classifiers with hand coders, and hand coders with one another.

# import the three PIs' independent hand-coding of the random subset of 200 response
SO.imp.open <- read_excel(here::here("data_raw","MII_hand_coding","MII_text_random_200_SO.xlsx"))
CN.imp.open <- read_excel(here::here("data_raw","MII_hand_coding","MII_text_random_200_CN.xlsx"))
CE.imp.open <- read_excel(here::here("data_raw","MII_hand_coding","MII_text_random_200_CE.xlsx"))

SO.imp.open$coder <- "SO"
CN.imp.open$coder <- "CN"
CE.imp.open$coder <- "CE"

# put all PI codings together
PI.imp.open <- bind_rows(SO.imp.open, CN.imp.open, CE.imp.open)

# pivotwider and coalesce to one column per category
PI.imp.open <- PI.imp.open %>%
  pivot_wider(names_from = imp.open.class.1,
              values_from = imp.open.class.1,
              values_fn = length,  # Count occurrences of each value
              values_fill = 0,     # Fill missing values with 0
              names_prefix = "1.") %>% # Prefix for indicator variables
  pivot_wider(names_from = imp.open.class.2,
              values_from = imp.open.class.2,
              values_fn = length,  # Count occurrences of each value
              values_fill = 0,     # Fill missing values with 0
              names_prefix = "2.") %>%
  pivot_wider(names_from = imp.open.class.3,
              values_from = imp.open.class.3,
              values_fn = length,  # Count occurrences of each value
              values_fill = 0,     # Fill missing values with 0
              names_prefix = "3.") 
# coalesce 
PI.imp.open <- PI.imp.open %>% 
  mutate(
    immigration = rowSums(dplyr::select(., contains("immigration"))),
    crime = rowSums(dplyr::select(., contains("crime"))),
    taxes = rowSums(dplyr::select(., contains("taxes"))),
    racism = rowSums(dplyr::select(., contains("racism"))),
    other = rowSums(dplyr::select(., matches("other|none|nonresponsive"))),
    abortion = rowSums(dplyr::select(., contains("abortion"))),
    inflation = rowSums(dplyr::select(., contains("inflation"))),
    environment = rowSums(dplyr::select(., contains("environment"))),
    homelessness = rowSums(dplyr::select(., contains("homelessness"))),
    health_care = rowSums(dplyr::select(., contains("health_care"))),
    cost_of_housing = rowSums(dplyr::select(., contains("cost_of_housing"))),
    availability_of_jobs = rowSums(dplyr::select(., contains("availability_of_jobs"))),
    education = rowSums(dplyr::select(., contains("education")))
  ) %>%
  dplyr::select(ResponseId, coder, immigration:education) 

# create analogously labels subsets of D
bag1.imp.open <- D %>%
  dplyr::select(ResponseId, matches("bag1")) %>%
  mutate(coder = "bag1",
         across(where(is.logical), as.numeric)
         ) %>%
  rename(immigration = imp.open.immigration.bag1,
         crime = imp.open.crime.bag1,
         taxes = imp.open.taxes.bag1,
         racism = imp.open.racism.bag1,
         abortion = imp.open.abortion.bag1,
         inflation = imp.open.inflation.bag1,
         environment = imp.open.environment.bag1,
         homelessness = imp.open.homeless.bag1,
         health_care = imp.open.healthcare.bag1,
         cost_of_housing = imp.open.cost.housing.bag1, 
         availability_of_jobs = imp.open.jobs.bag1,
         education = imp.open.education.bag1
           ) %>%
  dplyr::select(-c(imp.open.housing.or.homeless.bag1)) %>% 
  filter(ResponseId %in% SO.imp.open$ResponseId) %>% # restrict to rows in the hand-coded data
  arrange(match(ResponseId, SO.imp.open$ResponseId)) # order rows per hand-coded data

bag2.imp.open <- D %>%
  dplyr::select(ResponseId, matches("bag2")) %>%
  mutate(coder = "bag2",
         across(where(is.logical), as.numeric)
         ) %>%
  rename(immigration = imp.open.immigration.bag2,
         crime = imp.open.crime.bag2,
         taxes = imp.open.taxes.bag2,
         racism = imp.open.racism.bag2,
         abortion = imp.open.abortion.bag2,
         inflation = imp.open.inflation.bag2,
         environment = imp.open.environment.bag2,
         homelessness = imp.open.homeless.bag2,
         health_care = imp.open.healthcare.bag2,
         cost_of_housing = imp.open.cost.housing.bag2, 
         availability_of_jobs = imp.open.jobs.bag2,
         education = imp.open.education.bag2
           ) %>%
  dplyr::select(-c(imp.open.housing.or.homeless.bag2)) %>% 
  filter(ResponseId %in% SO.imp.open$ResponseId) %>% # restrict to rows in the hand-coded data
  arrange(match(ResponseId, SO.imp.open$ResponseId)) # order rows per hand-coded data

# combine all coders into single tibble
coded.imp.open <- PI.imp.open %>% 
  bind_rows(bag1.imp.open) %>% 
  bind_rows(bag2.imp.open) %>%
  nest(data = -coder)

# irr::kappa(2) needs both cols in same tibble, so:

rename_cols <- function(data, coder) {
  data %>%
    rename_with(~ if_else(. == "ResponseId", ., paste0(., '.', coder)))
}

coded.imp.open <- coded.imp.open %>% 
  mutate(data = map2(data, coder, ~ rename_cols(.x, .y)))

all.coded.imp.open <- reduce(coded.imp.open$data, left_join) # master tibble

coder.pairs <-combn(coded.imp.open$coder, 2, simplify = F) # list of all combinations of coder pairs

categories <- names(bag2.imp.open)[2:13]

kappa2_results <- tibble(
  coder.1 = unlist(map(coder.pairs, ~ .x[1])),
  coder.2 = unlist(map(coder.pairs, ~ .x[2])),
) 
kappa2_results[, categories] <- NA # add column for each target issue 

for(i in 1:nrow(kappa2_results)){
  for(j in name.issues){
    col1 <- paste0(j, ".", kappa2_results$coder.1[i])
    col2 <- paste0(j, ".", kappa2_results$coder.2[i])
    kappa2_results[i, j] <- kappa2(all.coded.imp.open[, c(col1, col2)])$value
  }
}

hand_coder_reliability <- kappa2_results %>%
  filter(coder.1 %in% c("SO", "CN", "CE") & coder.2 %in% c("SO", "CN", "CE")) %>% 
  dplyr::select(where(is.numeric)) %>%
  colMeans()

hand_coder_bag1_reliability <- kappa2_results %>%
  filter(coder.1 %in% c("SO", "CN", "CE") & coder.2 == "bag1") %>% 
  dplyr::select(where(is.numeric)) %>%
  colMeans()

hand_coder_bag2_reliability <- kappa2_results %>%
  filter(coder.1 %in% c("SO", "CN", "CE") & coder.2 == "bag2") %>% 
  dplyr::select(where(is.numeric)) %>%
  colMeans()

cohens_kappa_results <- list(
  hand_coder_reliability = hand_coder_reliability,
  hand_coder_bag1_reliability = hand_coder_bag1_reliability,
  hand_coder_bag2_reliability = hand_coder_bag2_reliability
)

saveRDS(cohens_kappa_results, here::here("data_transformed", "PIPE_paper", "intercoder_reliability_issue_imp.RData"))

# bottom line: Human coders do a generally excellent job, though consistency is variable across issues while exceeding Fleiss's 0.75 threshold for housing, homelessess, immigration, abortion, jobs, education, environment, taxes). Bag1 agrees with human coders more than 75% of time on housing, homelessnes, immigration, abortion, jobs, education, enviro, taxes, though median is 12 points less agreement b/t human coders and machine than human coders and one another. Bag2 is marginally better for median issue (9 points worse than agreement among human coders), but bombs environment.

# resolution: take the version of the machine that performs best on each issue, relative to the human coders.
bag2win <- (hand_coder_bag2_reliability - hand_coder_bag1_reliability) > 0


```

```{r}
#| label: plot_issue_public_open

# This code chunk generates SI Fig. B.3.

title_text <- 24
axis_text <- 18

D_issue_pub_open <- D %>%  
  filter(!is.na(Q5.9)) %>% # exclude any respondents who skipped MIP question 
  mutate(# convert NA response to FALSE so that denominator for the proportions includes everyone who answered Q5.9.
    across(imp.open.housing:imp.open.racism, 
    ~ case_when(is.na(.) ~ FALSE, 
                TRUE ~ .)
  )) %>%
  pivot_longer(imp.open.housing:imp.open.racism, 
                       names_to = "DV", 
                       values_to = "response") %>%
  filter(!is.na(DV)) %>%
  nest(data = -DV) %>%
  mutate(
    model = map(data, ~ lm(response==TRUE ~ 1, data = .)), 
    tidied = map(model, tidy),
    response.freq = map(data, ~.x %>% 
                            group_by(response) %>%
                            summarize(n = n())
                            )
  ) %>%
  unnest(c(tidied, response.freq)) %>%
  filter(response == TRUE) %>%
  mutate(
    proportion = n/length(which(!is.na(D$Q5.9))),
    group = case_when(
      DV %in% c("imp.open.housing", "imp.open.homeless" ) ~ "Housing & Homelessness",
      DV %in% c("imp.open.abortion", 
                "imp.open.crime" , 
                "imp.open.education", 
                "imp.open.environment",  
                "imp.open.racism",
                "imp.open.immigration") ~ "Social Issues",
      DV %in% c("imp.open.jobs", 
                "imp.open.inflation" , 
                "imp.open.taxes", 
                "imp.open.healthcare") ~ "Economic Issues" 
    ))
  
issue_pub_histogram_open <- D_issue_pub_open %>%
   ggplot(aes(
    x = DV,
    y = proportion,
    fill = group
  )) + 
  geom_bar(stat="identity") +
  scale_x_discrete(
    limits = c(
      "imp.open.housing",
      "imp.open.homeless",  
      "imp.open.abortion",
      "imp.open.jobs",
      "imp.open.inflation" ,
      "imp.open.crime",
      "imp.open.education" ,
      "imp.open.environment",
      "imp.open.taxes" ,
      "imp.open.healthcare",
      "imp.open.immigration",
      "imp.open.racism"),
    labels = c(
      "imp.open.housing" = "Cost of housing",
      "imp.open.homeless" = "Homelessness",
      "imp.open.abortion" = "Abortion",      
      "imp.open.jobs" = "Availability of jobs",
      "imp.open.inflation" = "Inflation",
      "imp.open.crime" = "Crime",
      "imp.open.education" = "Education",
      "imp.open.environment" = "Environment",
      "imp.open.taxes" = "Taxes" ,
      "imp.open.healthcare" = "Health care",
      "imp.open.immigration" = "Immigration",
      "imp.open.racism" = "Racism")
    ) + 
  labs(title = "Most Important Issue in State Politics (Free Text)",
       y = "",
       x = "") + 
  scale_fill_brewer("Issue Class",palette = "Dark2") + 
  scale_y_continuous(expand = c(0, 0), 
                     labels=scales::percent, 
                     limits = c(0, 0.14),
                     breaks = seq(0, 0.14, 0.02)
                     ) +
  geom_errorbar(
    aes(ymax = estimate + 1.96*std.error, ymin = estimate - 1.96*std.error),
    width = 0.1) + 
  theme_bw() + 
  theme(#plot.title = element_text(size = 12),
        #axis.title.y = element_text(size = 10),
        axis.text.y= element_text(size = axis_text),
        axis.text.x= element_text(angle = 45, hjust=1, size = axis_text),
        legend.position = "bottom",
        legend.key.size = unit(1, "cm"), 
        legend.title = element_text(size=axis_text), #change legend title font size
        legend.text = element_text(size=(0.67*axis_text)), #change legend text font size
        # text = element_text(size=14),
        plot.margin = ggplot2::margin(0.5, 1, 0.5, 1, "cm"), 
        plot.title = element_text(hjust = 0.5, size = title_text)
        ) 

issue_pub_histogram_open

ggsave(path=here::here("figures","PIPE_paper"), file="issue_public_open.pdf", height=10, width=12)

```

```{r}
#| label: plot_issue_public_open_tenure

# This code chunk generates SI Fig. B.4.

axis_text <- 13

D_issue_pub_open_tenure <- D %>%  
  filter(!is.na(Q5.9) & tenure %in% c("Owner", "Renter")) %>% # exclude any respondents who skipped MIP question 
  mutate(# convert NA response to FALSE so that denominator for the proportions includes everyone who answered Q5.9.
    across(imp.open.housing:imp.open.racism, 
    ~ case_when(is.na(.) ~ FALSE, 
                TRUE ~ .)
  )) %>%
  pivot_longer(imp.open.housing:imp.open.racism, 
                       names_to = "DV", 
                       values_to = "response") %>%
  filter(!is.na(DV)) %>%
  nest(data = -c(DV, tenure)) %>%
  mutate(
    model = map(data, ~ lm(response==TRUE ~ 1, data = .)), 
    tidied = map(model, tidy),
    response.freq = map(data, ~.x %>% 
                            group_by(response) %>%
                            summarize(n = n())
                            )
  ) %>%
  unnest(c(tidied, response.freq)) %>%
  filter(response == TRUE) %>%
  mutate(
    proportion = n/length(which(!is.na(D$Q5.9))),
    proportion = case_when(
      tenure == "Owner" ~ n/length(which(!is.na(D$Q5.9) & D$tenure == "Owner")),
      tenure == "Renter" ~ n/length(which(!is.na(D$Q5.9) & D$tenure == "Renter"))
    ),
    group = case_when(
      DV %in% c("imp.open.housing", "imp.open.homeless" ) ~ "Housing & Homelessness",
      DV %in% c("imp.open.abortion", 
                "imp.open.crime" , 
                "imp.open.education", 
                "imp.open.environment",  
                "imp.open.racism",
                "imp.open.immigration") ~ "Social Issues",
      DV %in% c("imp.open.jobs", 
                "imp.open.inflation" , 
                "imp.open.taxes", 
                "imp.open.healthcare") ~ "Economic Issues" 
    ))
  
issue_pub_histogram_tenure <- D_issue_pub_open_tenure %>%
   mutate( 
     tenure = factor(tenure),
     DV = case_match(DV, 
      "imp.open.housing" ~ "Cost of\nhousing",
      "imp.open.homeless" ~ "Homelessness",
      "imp.open.abortion" ~ "Abortion",      
      "imp.open.jobs" ~ "Availability\nof jobs",
      "imp.open.inflation" ~ "Inflation",
      "imp.open.crime" ~ "Crime",
      "imp.open.education" ~ "Education",
      "imp.open.environment" ~ "Environment",
      "imp.open.taxes" ~ "Taxes" ,
      "imp.open.healthcare" ~ "Health care",
      "imp.open.immigration" ~ "Immigration",
      "imp.open.racism" ~ "Racism"),
     DV = factor(DV, levels = c("Cost of\nhousing",
                                "Homelessness",
                                "Abortion",
                                "Availability\nof jobs",
                                "Inflation",
                                "Crime",
                                "Education",
                                "Environment",
                                "Taxes" ,
                                "Health care",
                                "Immigration",
                                "Racism"))
     ) %>%
  ggplot(aes(x=tenure,
            y=proportion, 
            fill=tenure)) +
  geom_bar(stat="identity", width=0.5) +
  scale_x_discrete(position="top", labels = NULL) +
  scale_y_continuous(expand = c(0, 0), 
                     labels=scales::percent, 
                     limits = c(0, 0.14),
                     breaks = seq(0, 0.14, 0.02)
                     ) +
  scale_fill_brewer(palette = "Dark2", name = NULL)+
  facet_wrap(~ DV, strip.position = "bottom", nrow = 1) +
  labs(title="Most Important Issue in State Politics (Free Text): Renters vs. Owners",
       x = "",
       y = "") +
  coord_cartesian(ylim = c(0, .14)) +
  geom_errorbar(
    aes(ymax = estimate + 1.96*std.error, ymin = estimate - 1.96*std.error),
    width = 0.1) + 
  theme_bw() +  
  theme(strip.background = element_blank(),
        plot.title = element_text(hjust = 0.5, size = title_text),
        legend.position = "bottom",        
        legend.key.size = unit(1, "cm"), 
        legend.title = element_text(size=axis_text), #change legend title font size
        legend.text = element_text(size=(0.9*axis_text)), #change legend text font size
        strip.text.x = element_text(angle = 45, hjust = 0.5, size = axis_text),
        # strip.text.y.left = element_text(angle = 0),
        panel.spacing = unit(0.2, "lines"),
        #plot.title = element_text(size = 10),
        strip.text = element_text(angle = 90),
        #axis.title.x = element_text(size = 8),
        #axis.title.y = element_text(size = 8),
        #legend.title=element_text(size=8), 
        #legend.text=element_text(size=8),
        #axis.text.y= element_text(size = 8),
        #axis.text.x= element_text(size = 8)
        text=element_text(size=graph_text)
        )

issue_pub_histogram_tenure


ggsave(path=here::here("figures","PIPE_paper"), file="issue_public_open_tenure.pdf", height=10, width=12)

```

### Closed-Form Issue Publics

```{r closed-form-issue-public}

# This code chunk generates SI Fig. B.1.

title_text <- 24
axis_text <- 18

D_issue_pub_closed <- D %>%  
  pivot_longer(starts_with("imp.top3"), 
                       names_to = "DV", 
                       values_to = "response") %>%
  filter(!is.na(DV)) %>%
  nest(data = -DV) %>%
  mutate(
    model = map(data, ~ lm(response==1 ~ 1, data = .)), 
    tidied = map(model, tidy),
    response.freq = map(data, ~.x %>% 
                            group_by(response) %>%
                            summarize(n = n())
                            )
  ) %>%
  unnest(c(tidied, response.freq)) %>%
  filter(response == 1) %>%
  mutate(
    proportion = n/length(which(!is.na(D$Q5.12))),
    group = case_when(
      DV %in% c("imp.top3.housing", "imp.top3.homeless" ) ~ "Housing & Homelessness",
      
      DV %in% c("imp.top3.housing", "imp.top3.homeless" ) ~ "Housing & Homelessness",
      DV %in% c("imp.top3.abortion", 
                "imp.top3.crime" , 
                "imp.top3.education", 
                "imp.top3.environment",  
                "imp.top3.racism",
                "imp.top3.immigration") ~ "Social Issues",
      DV %in% c("imp.top3.jobs", 
                "imp.top3.inflation" , 
                "imp.top3.taxes", 
                "imp.top3.healthcare") ~ "Economic Issues",
      DV == "imp.top3.none" ~ "None of these issues"
    ))
  
issue_pub_histogram <- D_issue_pub_closed %>%
   ggplot(aes(
    x = DV,
    y = proportion,
    fill = group
  )) + 
  geom_bar(stat="identity") +
  scale_x_discrete(
    limits = c(
      "imp.top3.housing",
      "imp.top3.homeless",  
      "imp.top3.abortion",
      "imp.top3.jobs",
      "imp.top3.inflation" ,
      "imp.top3.crime",
      "imp.top3.education" ,
      "imp.top3.environment",
      "imp.top3.taxes" ,
      "imp.top3.healthcare",
      "imp.top3.immigration",
      "imp.top3.racism", 
      "imp.top3.none" ),
    labels = c(
      "imp.top3.housing" = "Cost of housing",
      "imp.top3.homeless" = "Homelessness",
      "imp.top3.abortion" = "Abortion",      
      "imp.top3.jobs" = "Availability of jobs",
      "imp.top3.inflation" = "Inflation",
      "imp.top3.crime" = "Crime",
      "imp.top3.education" = "Education",
      "imp.top3.environment" = "Environment",
      "imp.top3.taxes" = "Taxes" ,
      "imp.top3.healthcare" = "Health care",
      "imp.top3.immigration" = "Immigration",
      "imp.top3.racism" = "Racism", 
      "imp.top3.none" = "None of these issues")
    ) + 
  labs(title = "Most Important Issues in State Politics (Closed Form)",
       y = "",
       x = "") + 
  scale_fill_brewer("Issue Class",palette = "Dark2") + 
  scale_y_continuous(expand = c(0, 0), 
                     labels=scales::percent, 
                     limits = c(0, 0.65), 
                     breaks = seq(0, 0.6, by=0.1)
                     ) +
  geom_errorbar(
    aes(ymax = estimate + 1.96*std.error, ymin = estimate - 1.96*std.error),
    width = 0.1) + 
  theme_bw() + 
  theme(#plot.title = element_text(size = 12),
        #axis.title.y = element_text(size = 10),
        axis.text.y= element_text(size = axis_text),
        axis.text.x= element_text(angle = 45, hjust=1, size = axis_text),
        legend.position = "bottom",
        legend.key.size = unit(1, "cm"), 
        legend.title = element_text(size=axis_text), #change legend title font size
        legend.text = element_text(size=(0.67*axis_text)), #change legend text font size
        # text = element_text(size=14),
        plot.margin = ggplot2::margin(0.5, 1, 0.5, 1, "cm"), 
        plot.title = element_text(hjust = 0.5, size = title_text)
        ) 

issue_pub_histogram

ggsave(path=here::here("figures","PIPE_paper"), file="closed-form-issue-public.pdf", height=10, width=12)



```

```{r closed-form-issue-public-tenure}

# This code chunk generates SI Fig. B.2.

axis_text <- 12
graph_text <- 18

D_issue_pub_closed_tenure <- D %>%
  filter(tenure %in% c("Owner", "Renter")) %>%
  pivot_longer(starts_with("imp.top3"), 
                       names_to = "DV", 
                       values_to = "response") %>%
  filter(!is.na(DV)) %>%
  nest(data = -c(DV, tenure)) %>%
  mutate(
    model = map(data, ~ lm(response==1 ~ 1, data = .)), 
    tidied = map(model, tidy),
    response.freq = map(data, ~.x %>% 
                            group_by(response) %>%
                            summarize(n = n())
                            )
  ) %>%
  unnest(c(tidied, response.freq)) %>%
  filter(response == 1) %>%
  mutate(
    proportion = case_when(
      tenure == "Owner" ~ n/length(which(!is.na(D$Q5.12) & D$tenure == "Owner")),
      tenure == "Renter" ~ n/length(which(!is.na(D$Q5.12) & D$tenure == "Renter"))
    ),
    group = case_when(
      DV %in% c("imp.top3.housing", "imp.top3.homeless" ) ~ "Housing & Homelessness",
      
      DV %in% c("imp.top3.housing", "imp.top3.homeless" ) ~ "Housing & Homelessness",
      DV %in% c("imp.top3.abortion", 
                "imp.top3.crime" , 
                "imp.top3.education", 
                "imp.top3.environment",  
                "imp.top3.racism",
                "imp.top3.immigration") ~ "Social Issues",
      DV %in% c("imp.top3.jobs", 
                "imp.top3.inflation" , 
                "imp.top3.taxes", 
                "imp.top3.healthcare") ~ "Economic Issues",
      DV == "imp.top3.none" ~ "None of these issues"
    ))
  
issue_pub_histogram_tenure <- D_issue_pub_closed_tenure %>%
   mutate( 
     tenure = factor(tenure),
     DV = case_match(DV, 
      "imp.top3.housing" ~ "Cost of\nhousing",
      "imp.top3.homeless" ~ "Homelessness",
      "imp.top3.abortion" ~ "Abortion",      
      "imp.top3.jobs" ~ "Availability\nof jobs",
      "imp.top3.inflation" ~ "Inflation",
      "imp.top3.crime" ~ "Crime",
      "imp.top3.education" ~ "Education",
      "imp.top3.environment" ~ "Environment",
      "imp.top3.taxes" ~ "Taxes" ,
      "imp.top3.healthcare" ~ "Health care",
      "imp.top3.immigration" ~ "Immigration",
      "imp.top3.racism" ~ "Racism", 
      "imp.top3.none" ~ "None of\nthese issues"),
     DV = factor(DV, levels = c("Cost of\nhousing",
                                "Homelessness",
                                "Abortion",
                                "Availability\nof jobs",
                                "Inflation",
                                "Crime",
                                "Education",
                                "Environment",
                                "Taxes" ,
                                "Health care",
                                "Immigration",
                                "Racism", 
                                "None of\nthese issues"))
     ) %>%
  ggplot(aes(x=tenure,
            y=proportion, 
            fill=tenure)) +
  geom_bar(stat="identity", width=0.5) +
  scale_x_discrete(position="top", labels = NULL) +
  scale_y_continuous(expand = c(0, 0), labels=scales::percent)+
  scale_fill_brewer(palette = "Dark2", name = NULL)+
  facet_wrap(~ DV, strip.position = "bottom", nrow = 1) +
  labs(title="Most Important Issues in State Politics (Closed Form): Renters vs. Owners",
       x = "",
       y = "") +
  coord_cartesian(ylim = c(0, .6)) +
  geom_errorbar(
    aes(ymax = estimate + 1.96*std.error, ymin = estimate - 1.96*std.error),
    width = 0.1) + 
  theme_bw() +  
  theme(strip.background = element_blank(),
        plot.title = element_text(hjust = 0.5, size = title_text),
        legend.position = "bottom",        
        legend.key.size = unit(1, "cm"), 
        legend.title = element_text(size=axis_text), #change legend title font size
        legend.text = element_text(size=(0.9*axis_text)), #change legend text font size
        strip.text.x = element_text(angle = 55, hjust = 0.5, size = axis_text),
        # strip.text.y.left = element_text(angle = 0),
        panel.spacing = unit(0.2, "lines"),
        #plot.title = element_text(size = 10),
        strip.text = element_text(angle = 90),
        #axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = (0.9*axis_text)),
        #legend.title=element_text(size=8), 
        #legend.text=element_text(size=8),
        #axis.text.y= element_text(size = 8),
        #axis.text.x= element_text(size = 8)
        text=element_text(size=graph_text),
        plot.margin = ggplot2::margin(0.5, 0.5, 0.5, 0.5, "cm"), 
        )

issue_pub_histogram_tenure

ggsave(path=here::here("figures","PIPE_paper"), file="closed-form-issue-public-tenure.pdf", height=10, width=12)


```

### Test-Retest Consistency

```{r}
#| label: test_retest_consistency
#| depends: plot_importance_by_policy_preference

# This code chunk, off plan, generates Table 3 and Table 4 in the paper.

# Q14.2 retests Q11.1 (with order of response options 1 and 2 flipped)
# Q14.4 retests Q12.2 (with order of response options 1 and 2 flipped)
# Q14.6 retests Q13.2 (with order of profiles 1 and 2 flipped) NB: I forgot to remove the "description" column from Q14.6 before the survey fielded, so it's not quite an exact replica of Q13.2. It is harder to read on a phone, and on a computer it may be marginally easier for the respondent home in on the issue that's most important to them. 

# overall retest consistency rates with 95% confidence intervals
D %>% 
  mutate(across(c(Q14.2, Q14.4, Q14.6), ~ recode(., '2' = '1', '1' = '2')),
         `Perceived-Efficacy Retest` =  as.numeric(Q14.2 == Q11.1),
         `Policy-Preference Retest` = as.numeric(Q14.4 == Q12.2),
         `Conjoint Retest` = as.numeric(Q14.6 == Q13.2),
         ) %>%
  pivot_longer(
    cols = matches("\\bRetest\\b"),
    names_to = "which_retest",
    values_to = "retest_pass"
  ) %>% 
  nest(data = - which_retest) %>%
  mutate(model = map(data, ~ lm(retest_pass ~ 1, data = .)),
         tidied = map(model, ~ tidy(., conf.int = T))
         ) %>%
  unnest_wider(tidied) %>%
  dplyr::select(c(which_retest, estimate, conf.low, conf.high)) %>%
  gt(rowname_col = "which_retest") %>%
  tab_header(title = "Test-Retest Consistency by Question Type") %>%
    fmt_number(
    columns = c(estimate, conf.low, conf.high), 
    decimals = 3
  ) %>%
  gtsave(path=here::here("figures","PIPE_paper"), file="test_retest_tab.tex")


# are housing policy preferences less stable than preferences over social and economic policies? maybe a little, but the big differnences are within housing policy type
D %>% 
  mutate(across(c(Q14.2, Q14.4, Q14.6), ~ recode(., '2' = '1', '1' = '2')),
         `Policy-Preference Retest` = as.numeric(Q14.4 == Q12.2),
          i_code_1_type = case_when( # classify issue 1 in the preference battery by type  
            i_code_1 %in% issues$code[issues$policy_type == "Economic"] ~ "econ",
            i_code_1 %in% issues$code[issues$policy_type == "Social"] ~ "social",
            TRUE ~ "housing") 
         ) %>%
  nest(data = - i_code_1_type) %>%
  mutate(model = map(data, ~ lm(`Policy-Preference Retest` ~ 1, data = .)),
         tidied = map(model, ~ tidy(., conf.int = T))
         ) %>%
  unnest_wider(tidied) %>%
  dplyr::select(c(i_code_1_type, estimate, conf.low, conf.high)) %>%
  arrange(i_code_1_type != "housing") %>%  # Move "housing" to top
  gt(rowname_col = "i_code_1_type") %>%
  tab_header(title = "Test-Retest Consistency of Policy Preferences, by Policy Type") %>%
    fmt_number(
    columns = c(estimate, conf.low, conf.high), 
    decimals = 3
  ) %>%
  gtsave(path=here::here("figures","PIPE_paper"), file="test_retest_policy_tab.tex")
  
# the next table disaggregates by housing policy type
D %>% 
  left_join(dplyr::select(issues, c(code, policy_type)), join_by(i_code_1 == code)) %>%
  mutate(
    across(c(Q14.2, Q14.4, Q14.6), ~ recode(., '2' = '1', '1' = '2')),
    `Policy-Preference Retest` = as.numeric(Q14.4 == Q12.2)) %>%
  nest(data = - policy_type) %>%
  mutate(model = map(data, ~ lm(`Policy-Preference Retest` ~ 1, data = .)),
         tidied = map(model, ~ tidy(., conf.int = T))
         ) %>%
  unnest_wider(tidied) %>%
  dplyr::select(c(policy_type, estimate, conf.low, conf.high)) %>%
  arrange(- estimate) %>%   
  gt(rowname_col = "i_code_1_type") %>%
  tab_header(title = "Test-Retest Consistency of Policy Preferences, by Policy Type") %>%
    fmt_number(
    columns = c(estimate, conf.low, conf.high), 
    decimals = 3
  ) %>%
  gtsave(path=here::here("figures","PIPE_paper"), file="test_retest_policy_disagg_tab.tex")
  


```

### Correlations Among Policy Preferences

```{r}
#| label: corplot_policy_prefs_2plots
#| depends: issue-pref-long-df

# This code chunk generates Fig. 4.4 and SI Fig. E.1., respectively, breaking the housing and nonhousing issues into separate plots.  

corr_lab_size <- 5 # fix this in setup block

D_plot <- D_issues %>%
  dplyr::select(y, code, ResponseId, tenure, want.price, pid3.wlean, libcon, imp.top3.housing, imp.open.housing, blame.developer, blame.landlord) %>%
  mutate(
    `Party ID (Democratic)` = case_when(
      pid3.wlean == "dem" ~ 3,
      pid3.wlean == "io" ~ 2,
      pid3.wlean == "rep" ~ 1
    ),
    `Tenure (Renter)` = case_when(
      tenure == "Renter" ~ 1,
      tenure == "Owner" ~ 0,
    ),
    `Wants Lower Hous. Prices` = case_when(
      want.price == "Lower" ~ 3,
      want.price == "Same" ~ 2,
      want.price == "Higher" ~ 1,
    ),
    `Housing MIP` = as.numeric(imp.open.housing),
    `Housing Top-3` = imp.top3.housing,
    `Blame: Developers` = blame.developer,
    `Blame: Landlords` = blame.landlord
  ) %>%
  rename(`Ideology (Liberal)` = libcon) %>%
  left_join(dplyr::select(issues, c(code, plot_shorthand))) %>%
  dplyr::select(- code) %>%
  pivot_wider(names_from = "plot_shorthand",
              values_from = "y") %>%
  dplyr::select( 
    `Housing Top-3`,
    `Housing MIP`,
    `Tenure (Renter)`,
    `Wants Lower Hous. Prices`,
    `Party ID (Democratic)`,
    `Ideology (Liberal)`,
    `Allow More MR Infill`,
    `Allow More MR Sprawl`,
    `Allow More BMR Infill`,
    `Allow More BMR Sprawl`,
    `Increase BMR Spending`,
    `Nondiscretionary Permits`,
    `Reduce Parking Minimums`,
    `Reduce Dev. Fees`,
    `Rent Control`,
    `Property-Tax Control`,
    `Middle-Income IZ`,
    `Low-Income IZ`,
    `More Housing Vouchers`,
    `Renter Tax Break`,
    `Down-Payment Subsidy`,
    `Restrict MR for Future BMR`,
    `Restrict Wall-St. Buyers`,
    `Tax the Rich`,
    `No Middle-Class Tax Cut`,
    `Higher Minimum Wage`,
    `Easier Unionization`,
    `Cap Drug Prices`,
    `More Health Spending`,
    `More Education Spending`,
    `Less Penalty for Anti-White Discrim.`,
    `More Penalty for Anti-POC Discrim.`,
    `More Immigration`,
    `Reduce Criminal Sentences`,
    `Less Police`,
    `More Arts Spending`,
    `Legalize Marijuana`,
    `Reduce Barriers to Abortion`,
    `Reduce Barriers to Voting`,
    `More Gun Control`,    
    `Stricter GHG Regs`,
    `Stricter GMO Regs`,
    `More Nature Preserves`,
    `Higher Gas Taxes`,
    `Stricter Technology Regs`) %>%
  relocate( 
    `Allow More MR Infill`,
    `Allow More MR Sprawl`,
    `Allow More BMR Infill`,
    `Allow More BMR Sprawl`,
    `Increase BMR Spending`,
    `Nondiscretionary Permits`,
    `Reduce Parking Minimums`,
    `Reduce Dev. Fees`,
    `Rent Control`,
    `Property-Tax Control`,
    `Middle-Income IZ`,
    `Low-Income IZ`,
    `More Housing Vouchers`,
    `Renter Tax Break`,
    `Down-Payment Subsidy`,
    `Restrict MR for Future BMR`,
    `Restrict Wall-St. Buyers`,
    `Tax the Rich`,
    `No Middle-Class Tax Cut`,
    `Higher Minimum Wage`,
    `Easier Unionization`,
    `Cap Drug Prices`,
    `More Health Spending`,
    `More Education Spending`,
    `Less Penalty for Anti-White Discrim.`,
    `More Penalty for Anti-POC Discrim.`,
    `More Immigration`,
    `Reduce Criminal Sentences`,
    `Less Police`,
    `More Arts Spending`,
    `Legalize Marijuana`,
    `Reduce Barriers to Abortion`,
    `Reduce Barriers to Voting`,
    `More Gun Control`,    
    `Stricter GHG Regs`,
    `Stricter GMO Regs`,
    `More Nature Preserves`,
    `Higher Gas Taxes`,
    `Stricter Technology Regs`,
    `Tenure (Renter)`,
    `Wants Lower Hous. Prices`,
    `Party ID (Democratic)`,
    `Ideology (Liberal)`,
    `Housing Top-3`,
    `Housing MIP`)

D_plot_hous <- D_plot %>%
  dplyr::select(`Allow More MR Infill`:`Restrict Wall-St. Buyers`, `Tenure (Renter)`:`Housing MIP`)

pmat <- D_plot_hous %>% cor_pmat()

D_plot_hous %>%
  cor(use="pairwise.complete.obs") %>% 
  ggcorrplot(type="lower",
             lab=T, 
             p.mat=pmat,
             insig="pch", 
             lab_size=corr_lab_size) +
  labs(title = "",
       subtitle = "") + 
  theme(legend.position="none", 
        text=element_text(size=(0.7*main_text)), 
        axis.text.x = element_text(size=(0.9*axis_text)),
        axis.text.y = element_text(size=(0.9*axis_text))
        )

ggsave(path=here::here("figures","PIPE_paper"), file="corplot_policy_prefs_hous.pdf", height=16, width=16) #  Fig. 4.4

# now plot the nonhousing matrix
D_plot_nonhous <- D_plot %>%
  dplyr::select(`Tax the Rich`:`Housing MIP`)

pmat <- D_plot_nonhous %>% cor_pmat()

D_plot_nonhous %>%
  cor(use="pairwise.complete.obs") %>% 
  ggcorrplot(type="lower",
             lab=T, 
             p.mat=pmat,
             insig="pch", 
             lab_size=corr_lab_size) +
  labs(title = "",
       subtitle = "") + 
  theme(legend.position="none", 
        text=element_text(size=(0.6*main_text)), 
        axis.text.x = element_text(size=(0.6*axis_text)),
        axis.text.y = element_text(size=(0.6*axis_text))
        )

ggsave(path=here::here("figures","PIPE_paper"), file="corplot_policy_prefs_nonhous.pdf", height=16, width=16)  # SI Fig. E.1

# # check number of observations per cor coefficient
# cor_obs <- psych::pairwiseCount(D_plot, diagonal = FALSE)  # count number of observations per pair
# cor_obs <- cor_obs[1:39,1:39] # drop non-policy pairs
# quantile(cor_obs, na.rm = T) # 25th, 75th quantiles are 300, 322


```

# Appendix: Survey Demographics

Table @demo_table compares the demographics of respondents to this survey (fielded in March 2024) to the demographics of the overall U.S. population as of the 2022 American Community Survey 5-year estimates (the latest available.)

```{r}
#| label: demo_table

# This generates Table 1 in the SI.

library(tidycensus)
library(tidyverse)
library(googlesheets4)
library(zipcodeR)
library(modelsummary)
library(berryFunctions)

##Bring in zip codes and filter out 
zips <- read_sheet("https://docs.google.com/spreadsheets/d/1wX7yIZHllzOXXfyM7QKv2MRTjfGK86F4ZNMyLF8j-lU/edit#gid=647139815") %>%
  filter(rural_less_500==0)

####Read in data by zip to match our survey categories
##first variables

vars <- load_variables(2022, "acs5")

cen_edu <- get_acs("zcta", variables=c(ba="B06009_005",
                                       ma="B06009_006"), summary_var = "B06009_001", year=2022) %>% 
  group_by(GEOID) %>% 
  summarise(summary_est=mean(summary_est, na.rm=T),
    pct_ba=sum(estimate[variable %in% c("ba","ma")], na.rm=T)/summary_est) 

cen_race <- get_acs("zcta", variables=c(White="B01001H_001",
                                        Black="B01001B_001",
                                        Asian="B01001D_001",
                                        Hispanic="B01001I_001",
                                        `Multi/Other`=c("B01001G_001",
                                                      "B01001C_001",
                                                      "B01001E_001",
                                                      "B01001F_001")),
                    summary_var = "B01001_001", year=2022)

##Other race double counts from something. Drop it when calculating other

sum_cen_race <- cen_race %>% 
  group_by(GEOID) %>% 
  summarise(summary_est=mean(summary_est),
            pct_white=estimate[variable=="White"]/summary_est,
            pct_black=estimate[variable=="Black"]/summary_est,
            pct_asian=estimate[variable=="Asian"]/summary_est, 
            pct_latino=estimate[variable=="Hispanic"]/summary_est, 
            pct_other=sum(estimate[variable %in% c(paste0("Multi/Other",1:3))])/summary_est) 


cen_demos <- get_acs("zcta", variables=c(male="B01001_002", 
                                         ba="B06009_005",
                                         ma="B06009_006",

age_18_under = paste0("B01001_0",c(str_pad(03:06, width = 2, side="left", pad = 0),
                                                                       27:30)),                                       
                                                                                  age_18_29=paste0("B01001_0",c(str_pad(07:11, width = 2, side="left", pad = 0),
                                                                       31:35)),
                                         age_30_44=paste0("B01001_0",c(str_pad(12:14, width = 2, side="left", pad = 0),
                                                                       36:38)),
                                         age_45_64=paste0("B01001_0",c(str_pad(15:19, width = 2, side="left", pad = 0),
                                                                       39:43)),
                                         age_65plus=paste0("B01001_0",c(str_pad(20:25, width = 2, side="left", pad = 0),
                                                                       44:49))), 
                                         summary_var = "B01001_001", year=2022) 

sum_cen_demos <- cen_demos %>% group_by(GEOID) %>% 
  summarise(summary_est=mean(summary_est),
            pct_male=estimate[variable=="male"]/summary_est, 
            age_18_under = sum(estimate[variable %in% 
 c(paste0("age_18_under",1:8))])/summary_est, 
            age_18_29=sum(estimate[variable %in% 
                                     c(paste0("age_18_29",1:10))])/summary_est,
            age_30_44=sum(estimate[variable %in% 
                                     c(paste0("age_30_44",1:6))])/summary_est,
            age_45_64=sum(estimate[variable %in%
                                    c(paste0("age_45_64",1:10))])/summary_est,
            age_65plus=sum(estimate[variable %in%
                                     c(paste0("age_65plus",1:12))])/summary_est)

cen_demos_inc <- get_acs("zcta", variables=c(inc_bracket=paste0("B19001_0", str_pad(02:17, width = 2, side="left", pad = 0))), 
                         summary_var = "B19001_001", year=2022) %>% 
  group_by(GEOID) %>% 
  summarise(summary_est=mean(summary_est,na.rm=T),
            less_30k = sum(estimate[variable %in% c(paste0("inc_bracket",1:5))])/summary_est,
            `30_39`=sum(estimate[variable %in% c(paste0("inc_bracket",6:7))])/summary_est,
            `40_49`=sum(estimate[variable %in% c(paste0("inc_bracket",8:9))])/summary_est,
            `50_59`=sum(estimate[variable %in% c(paste0("inc_bracket",10))])/summary_est,
            `60_74`=sum(estimate[variable %in% c(paste0("inc_bracket",11))])/summary_est,
            `75_99`=sum(estimate[variable %in% c(paste0("inc_bracket",12))])/summary_est,
            `100_124`=sum(estimate[variable %in% c(paste0("inc_bracket",13))])/summary_est,
            `more_124`=sum(estimate[variable %in% c(paste0("inc_bracket",14:16))]/summary_est)) 


cen_demos_homeown <- get_acs("zcta", variables=c(owner_occ="B25003_002"), 
                     summary_var = "B25003_001", year=2022) %>% 
  group_by(GEOID) %>% 
  summarise(pct_homeowners=estimate/summary_est)

cen_demos_housecost <- get_acs("zcta", 
                               variables=c(cost=
                                 paste0("B25104_0", str_pad(02:17, width = 2, side="left", pad = 0))), 
                               summary_var = "B25104_001", year=2022) %>% group_by(GEOID) %>% 
  summarise(summary_est=mean(summary_est),
            less_300=sum(estimate[variable %in% c("cost1","cost2","cost3")])/summary_est,
            `300_499`=sum(estimate[variable %in% c("cost4","cost5")])/summary_est,
            `500_999`=sum(estimate[variable %in% c("cost6","cost7","cost8","cost9","cost10")])/summary_est,
            `1000_1499`=sum(estimate[variable %in% c("cost11")])/summary_est,
            `1500_1999`=sum(estimate[variable %in% c("cost12")])/summary_est,
            `2000_2499`=sum(estimate[variable %in% c("cost13")])/summary_est,
            `2500_2999`=sum(estimate[variable %in% c("cost14")])/summary_est,
            `3000_more`=sum(estimate[variable %in% c("cost15")])/summary_est,
            no_cash_rent=sum(estimate[variable %in% c("cost16")])/summary_est)

###merge census demographics

census_demos <- inner_join(cen_edu, sum_cen_demos, by="GEOID") %>% 
  inner_join(cen_demos_inc, by="GEOID")  %>% 
  inner_join(cen_demos_homeown, by="GEOID") %>% 
  inner_join(cen_demos_housecost, by="GEOID") %>% 
  inner_join(sum_cen_race, by="GEOID") %>% 
  dplyr::select(-contains("summary_est")) %>% 
  filter(GEOID %in% zips$GEOID20) %>% 
  rename_at(vars(9:16),function(x) paste0("inc_",x)) %>% 
  rename_at(vars(18:26), function(x) paste0("home_cost_",x)) %>% 
  inner_join(sum_cen_race[,c("GEOID","summary_est")], by="GEOID") %>% 
  mutate(w=summary_est/sum(summary_est))

##Print in format to include in final tex table

table_order <- c("pct_male","pct_homeowners","pct_ba","age_18_under","age_18_29", "age_30_44", "age_45_64", "age_65plus", "pct_asian","pct_black","pct_latino","pct_other","pct_white",
                 "inc_less_30k", "inc_30_39", "inc_40_49", "inc_50_59", "inc_60_74", "inc_75_99", "inc_100_124", "inc_more_124", "home_cost_less_300", "home_cost_300_499", 
"home_cost_500_999", "home_cost_1000_1499", "home_cost_1500_1999", 
"home_cost_2000_2499", "home_cost_2500_2999", "home_cost_3000_more", 
"home_cost_no_cash_rent")

us_census_demos <- census_demos %>% mutate_at(vars(pct_ba:pct_other),function(x) x*100) %>% 
  data.frame() %>% 
  datasummary(All(.)~ weighted.mean * Arguments(w = w, na.rm=T) + SD,output="data.frame", data=.) %>% 
  rename("Census Category"=1, "Percent"=2) %>% 
  arrange(factor(`Census Category`, levels=table_order)) %>% 
  slice_head(n=-2)


###Survey Demographics

surv <- D %>% 
  dplyr::select(age.cat, male,race.eth,income, ownhome, has.ba, housing.costs) %>% 
  mutate(income= case_when(income==1~"Less Than $30,000",
                           income==2~"$30,000 – 39,999",
                           income==3~"$40,000 – 49,999",
                           income==4~"$50,000 – 59,999",
                           income==5~"$60,000 – 69,999",
                           income==6~"$70,000 – 79,999",
                           income==7~"$80,000 – 89,999",
                           income==8~"$90,000 – 99,999",
                           income==9~"$100,000 – 109,999",
                           income==10~"$110,000 – 119,999",
                           income==11~"More than $120,000",
                           is.na(income) ~ "NA") %>% factor(levels=c("Less Than $30,000",
                                                                                "$30,000 – 39,999",
                                                                                "$40,000 – 49,999",
                                                                                "$50,000 – 59,999",
                                                                                "$60,000 – 69,999",
                                                                                "$70,000 – 79,999",
                                                                                "$80,000 – 89,999",
                                                                                "$90,000 – 99,999",
                                                                                "$100,000 – 109,999",
                                                                                "$110,000 – 119,999",
                                                                                "More than $120,000",
                                                                     "NA")),
         housing.costs=case_when(housing.costs==1 ~ "Less than $250",
                                 housing.costs==2 ~ "$500",
                                 housing.costs==3 ~ "$750",
                                 housing.costs==4 ~ "$1,000",
                                 housing.costs==5 ~ "$1,500",
                                 housing.costs==6 ~ "$2,000",
                                 housing.costs==7 ~ "$2,500",
                                 housing.costs==8 ~ "$3,000",
                                 housing.costs==9 ~ "$4,000",
                                 housing.costs==10 ~ "$5,000",
                                 housing.costs==11 ~ "$7,500",
                                 housing.costs==12 ~ "$10,000",
                                 housing.costs==13 ~ "$15,000",
                                 housing.costs==14 ~ "More than $20,000",
                                 is.na(housing.costs) ~ "NA")%>% factor(levels=c("Less than $250",
                                                                                             "$500",
                                                                                             "$750",
                                                                                             "$1,000",
                                                                                             "$1,500",
                                                                                             "$2,000",
                                                                                             "$2,500",
                                                                                             "$3,000",
                                                                                             "$4,000",
                                                                                             "$5,000",
                                                                                             "$7,500",
                                                                                             "$10,000",
                                                                                             "$15,000",
                                                                                             "More than $20,000",
                                                                                 "NA"))) %>%
  datasummary((`Male`=male)+(`Homeowner`=ownhome)+(`Has B.A. or Above`=has.ba)+(`Age`=factor(age.cat))+
                (`Race/Ethnicity`=factor(race.eth))+(`Yearly Income`=factor(income))+
                (`Monthly Housing Costs`=factor(housing.costs))~Mean+SD+N+Percent("col") , fmt=2,data=., 
              output="data.frame") %>% 
  mutate("Percent" = ifelse(Mean=="", as.numeric(Percent), as.numeric(Mean)*100)) %>% 
  dplyr::select(1:2,"Percent") %>% 
   insertRows(.,c(4),new="")

  ###You need to add a row for $15,000 housing costs because there are none and modelsummary drops it which means you can't combine with other surveys. This is dumb.
  # new_row <- data.frame(" "=" ","  "="$15,000","Percent"=0) %>% 
  #   rename_all(~colnames(surv))
  # 
  # surv <- surv %>% dplyr::slice(.,1:36) %>% 
  #   bind_rows(new_row) %>% bind_rows(dplyr::slice(surv,37:n()))
  # 


####Finagle Census Categeory structure so that it fits with the survey data format. 
census_for_table <- us_census_demos %>% mutate(
  `Census Category` = case_when(
    `Census Category`=="age_18_under" ~ "Under 18",
    `Census Category`=="inc_60_74" ~ "$60,000 - $74,000",
      `Census Category`=="inc_75_99" ~ "$75,000 - $99,000",
    `Census Category`=="inc_100_124" ~ "$100,000 - $124,000",
    `Census Category`=="inc_more_124" ~ "$124,000 or more",
    `Census Category`=="home_cost_less_300" ~ "Less than $300",
    `Census Category`=="home_cost_300_499" ~ "$300 - $499",
    `Census Category`=="home_cost_500_999" ~ "$500 - $999",
    `Census Category`=="home_cost_1000_1499" ~ "$1,000 - $1,499",
    `Census Category`=="home_cost_1500_1999" ~ "$1,500 - $1,999",
    `Census Category`=="home_cost_2000_2499" ~ "$2,000 - $2,499",
    `Census Category`=="home_cost_2500_2999" ~ "$2,500 - $2,999",
    `Census Category`=="home_cost_3000_more" ~ "$3,000 or more",
    `Census Category`=="home_cost_no_cash_rent" ~ "No cash rent",
    TRUE ~ "")) %>% 
  insertRows(.,c(18,20,22,24,33,34,35,36,37,38),new="")

###Print survey and census demos as tex file
##Drop the SD for the census. It's pointless.

bind_cols(surv, census_for_table %>% dplyr::select(-SD)) %>%  knitr::kable(.,format="latex",booktabs=T, longtable=T, 
                                                    col.names = c("","","Percent","Census Category","Percent")) %>% 
  add_header_above(c(" "=2,"Survey"=1,"U.S. Census"=2), align = c("l", "r", "l","r")) %>% 
  kable_styling(full_width = F, font_size = 8) %>% 
  save_kable(.,file=here("figures","PIPE_paper","demographics_table.tex"))


  

```

```{r}
#| label: cces_demo_table

# This generates Table 2 in the SI.

library(tidyverse)
library(here)
library(googlesheets4)
library(modelsummary)
library(kableExtra)
options(dplyr.width=Inf)

##load zips used in survey
zips <- read_sheet("https://docs.google.com/spreadsheets/d/1wX7yIZHllzOXXfyM7QKv2MRTjfGK86F4ZNMyLF8j-lU/edit#gid=647139815") %>%
  filter(rural_less_500==0)


##load ces data and subset to just zips in survey
#Drop 8,394 people from rural zips. Total 51,606 rows. 
ces <- read_csv(here("data_raw","dataverse_files","CCES22_Common_OUTPUT_vv_topost.csv")) %>% 
  filter(lookupzip %in% zips$GEOID20)

####
ces %>% mutate(age = 2022 - birthyr,
               age_18_29 = ifelse(age <=29, 1,0),
               age_30_44 = ifelse(age >=30 & age<=44, 1,0),
               age_45_64 = ifelse(age >=45 & age<=64, 1,0),
               age_65_plus = ifelse(age >=65, 1,0),
               dem = ifelse(pid7 %in% c(1,2,5),1,0),
               gop = ifelse(pid7 %in% c(3,4,6),1,0),
               other_party = ifelse(pid7 > 6,1,0),
               white= ifelse(race==1, 1,0),
               black = ifelse(race==2, 1,0),
               latino =ifelse(race==3, 1,0),
               asian = ifelse(race==4, 1,0),
               other_race = ifelse(race %in% 5:8, 1,0),
               male = ifelse(gender4 == 1, 1,0),
               ba_plus = ifelse(educ %in% 5:6,1,0),
               inc_less_30k = ifelse(faminc_new %in% 1:3,1,0),
               inc_30_39 = ifelse(faminc_new==4,1,0),
               inc_40_49 = ifelse(faminc_new==5,1,0),
               inc_50_59 = ifelse(faminc_new==6,1,0),
               inc_60_69 = ifelse(faminc_new==7,1,0),
               inc_70_79 = ifelse(faminc_new==8,1,0),
               inc_80_99 = ifelse(faminc_new==9,1,0),
               inc_100_119 = ifelse(faminc_new==10,1,0),
               inc_120_plus = ifelse(faminc_new > 10,1,0),) %>% 
  datasummary((`Pct. Aged 18 to 29`=age_18_29) + 
                (`Pct. Aged 30 to 44`=age_30_44) +
                (`Pct. Aged 45 to 64`=age_45_64) +
                (`Pct. Aged 65 and Above`=age_65_plus) +
                (` Pct. Democrat`=dem) + (`Pct. GOP`=gop) + 
                (`Pct. Other Party`=other_party) + (`Pct. White`=white) +
                (`Pct. Black`=black) + (`Pct. Latino`=latino) + 
                (`Pct. Asian`=asian) + (`Pct. Other Race`=other_race) +
                (`Pct. Male`=male) + (`Pct. BA or Above`=ba_plus) + 
                (`Pct. Family Income Less than $30,000`= inc_less_30k) + 
                (`Pct. Family Income $30,000-$39,999`=inc_30_39) +
                (`Pct. Family Income $40,000-$49,999`=inc_40_49) + 
                (`Pct. Family Income $50,000-$59,999`=inc_50_59) +
                (`Pct. Family Income $60,000-$69,999`=inc_60_69) + 
                (`Pct. Family Income $70,000-$79,999`=inc_70_79) +
                (`Pct. Family Income $80,000-$89,999`=inc_80_99) + 
                (`Pct. Family Income $100,000-$119,999`=inc_100_119) +
                (`Pct. Family Income $120,000 and Above`=inc_120_plus) ~ weighted.mean * Arguments(w = commonweight, na.rm=T),fmt=4,output="data.frame", data=.) %>% 
  mutate(weighted.mean = as.numeric(weighted.mean) *100) %>% 
  knitr::kable(.,format="latex",booktabs=T, longtable=T, 
               col.names = c("CCES Category","Value")) %>% 
  kable_styling(full_width =T) %>% 
  save_kable(.,file=here("figures","PIPE_paper","cces_table.tex"))


```

# Appendix: Pilot Study Results

The appendix provides results from a pilot study that we appended to a survey fielded in May of 2023 on a nationally representative sample of urban and suburban respondents. Respondents ($N \approx 2000$) were randomly assigned to one of two batteries of housing-policy questions, which varied in their complexity. (We include only the simpler battery here, which generated somewhat more variance in win rates.)

Respondents made 9 pairwise efficacy judgments of a random subset of housing policies, and were then asked whether they personally support or oppose 5 policies. In the efficacy block, we randomized the target population and piped in the respndent's state: "Which policy would be more effective for helping \${e://Field/target_pop} in \${e://Field/State} get housing they can afford?." \${e://Field/target_pop} is either "low-income households" or "middle-income households," and \${e://Field/State} is always the respondent's state. In the figures below, we pool over target populations, and in all but the first figure, we lump the policies into categories. The pilot did not include any measures of issue importance.

@fig-pilot-efficacy-by-policy shows the average pairwise win rates of each policy, as well as the convention we used to lump policies into categories. The main takeaway is that the policies' average perceived efficacy is highly variable, with average win rates ranging from lows of about 30-35% for policies like building a border wall, banning short-term vacation rentals, and taxing vacancies, to highs of 65-75% for policies like rent control, inclusionary housing mandates, and subsidies for first-time homebuyers. On average, price controls and demand subsidies are regarded as more effective than supply-side policies. Figure @fig-pilot-efficacy-by-category shows win rates by grouped policy type, and Figures @fig-pilot-efficacy-by-tenure, @fig-pilot-efficacy-by-price-desire, and @fig-pilot-efficacy-by-party provide the corresponding grouped win rates by tenure (homeowner vs. renter), desired future home prices and rents in the respondent's city (lower or not-lower than today), and party identification, respectively.

Finally, @fig-pilot-win-pct-vs-avg-support shows the relationship in the full sample between support for policies in a category and the rated efficacy of the policies in that category. In general, policy categories that are perceived as more effective are also more likely to be supported.

## Efficacy Judgments (Pairwise Win Rates)

```{r}
#| label: fig-pilot-efficacy-by-category
#| fig-cap: "Win Percentage Overall, Grouped by Policy Category. Data from Pilot Survey."

# This is SI Fig. G.2.

t_data <- pilot$D %>% dplyr::select(contains("efficacy_CN"), ResponseId) %>% 
  drop_na()

t_data <- compare_columns(t_data, "CN")

##Reshape the h2h efficacy ratings and then calculate win rates with se's that account for clustering. 

cn_wp <- win_rate_cluster(t_data, "CN") %>% 
  lm_robust(y ~ group-1, data=., clusters=id) %>% 
  tidy(.,conf.int=T) %>% 
  mutate(term=str_replace(term, "^group",""),
         est_rank=rank(-estimate))
 

###Reorder the policy areas based on win percentage

cn_order <- cn_wp %>% arrange(desc(est_rank))

##Plot the output

cn_overall <- cn_wp %>% 
  mutate(Element=factor(term, levels=cn_order$term)) %>% 
  ggplot(aes(x=reorder(str_wrap(term,width=70),estimate), y=estimate, color=term)) + geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=.4) +
  scale_x_discrete("") + 
  scale_y_continuous("Win Percentage", label=scales::percent) + 
  scale_fill_brewer("Policy Type", palette = "Paired") +
  coord_flip() +
  geom_hline(yintercept=.5, linetype="dashed")+
  theme_bw() +
  theme(strip.background = element_blank(),
        legend.position = "none",
        #strip.text.y.left = element_text(angle = 90),
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        #axis.title.x = element_text(size = 8),
        #axis.title.y = element_text(size = 8), #legend.title=element_text(size=8),
        #legend.title=element_text(size=graph_text),
        #legend.text=element_text(size=graph_text),
        #axis.text.y= element_text(size = axis_text),
        #axis.text.x= element_text(size = axis_text),
        text=element_text(size=graph_text),
        plot.title = element_text(size=graph_text, hjust = 0.5))


ggsave(path=here::here("figures","PIPE_paper"), file="group_win_percentages_overall_CN.pdf", height=10, width=16)


```

```{r}
#| label: fig-pilot-efficacy-by-tenure
#| fig-cap: "Pairwise Win Percentage by Policy Category: Homeowners vs. Renters. Data from Pilot Survey."

t_data <- pilot$D %>% dplyr::select(contains("efficacy_CN"), tenure, ResponseId) %>% 
  drop_na()

t_data <- t_data %>% split(f=as.factor(.$tenure)) %>% 
  map(~compare_columns(.,"CN"))

cn_wp <- t_data %>% 
  map(~win_rate_cluster(., "CN")) %>% 
  map(~lm_robust(y ~ group-1, data=., clusters=id)) %>% 
  map(~tidy(.,conf.int=T)) 

cn_tenure <- cn_wp %>% bind_rows(.,.id="tenure") %>% 
  filter(tenure!="Other") %>% 
  mutate(term=str_replace(term, "^group",""),
         est_rank=rank(-estimate)) %>% 
ggplot(aes(x=reorder(term,desc(term)), y=estimate, color=term, shape=tenure)) + 
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=.4,
                position=position_dodge(width=.9)) +
  geom_point(position=position_dodge(width=.9)) +
  scale_x_discrete("") + 
  scale_y_continuous("Pairwise Win Percentage", labels=scales::percent) + 
  geom_hline(yintercept = .5, linetype="dashed") +
  scale_fill_brewer("Policy Type", palette = "Paired") +
  scale_shape_discrete("Homeownership Status", labels=c("Owner","Renter")) +
  coord_flip() +
  theme_bw() +
  theme(strip.background = element_blank(),
        legend.position = "bottom",
        #strip.text.y.left = element_text(angle = 90),
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        #axis.title.x = element_text(size = 8),
        #axis.title.y = element_text(size = 8), #legend.title=element_text(size=8),
        #legend.title=element_text(size=main_text),
        #legend.text=element_text(size=main_text),
        #axis.text.y= element_text(size = axis_text),
        #axis.text.x= element_text(size = axis_text),
        text=element_text(size=graph_text),
        plot.title = element_text(size=main_text, hjust = 0.5))+
  guides(color=FALSE)

ggsave(path=here::here("figures","PIPE_paper"), file="group_win_percentages_tenure_CN.pdf", height=10, width=16)
```

```{r}
#| label: fig-pilot-efficacy-by-party
#| fig-cap: "Pairwise Win Percentage by Policy Category: Party ID. Data from Pilot Survey."

###Clayton version

###Using leaners. 

t_data <- pilot$D %>% dplyr::select(contains("efficacy_CN"), pid3.wlean, ResponseId) %>% 
  drop_na()

t_data <- t_data %>% split(f=as.factor(.$pid3.wlean)) %>% 
  map(~compare_columns(.,"CN"))

cn_wp <- t_data %>% 
  map(~win_rate_cluster(., "CN")) %>% 
  map(~lm_robust(y ~ group-1, data=., clusters=id)) %>% 
  map(~tidy(.,conf.int=T)) %>% 
  bind_rows(.id="party_id") 

cn_wp %>% 
  mutate(term=str_replace(term, "^group",""),
         est_rank=rank(-estimate)) %>% 
  ggplot(aes(x=reorder(term,desc(term)), y=estimate, color=term, shape=party_id)) + 
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=.4,
                position=position_dodge(width=.9)) +
  geom_point(position=position_dodge(width=.9)) +
  scale_x_discrete("") + 
  scale_y_continuous("Pairwise Win Percentage", labels=scales::percent) + 
  geom_hline(yintercept = .5, linetype="dashed") +
  scale_fill_brewer("Policy Type", palette = "Paired") +
  scale_shape_discrete("Party ID", labels=c("Dem","Ind","GOP")) +
  coord_flip() +
  theme_bw() +
  theme(strip.background = element_blank(),
        legend.position = "bottom",
        #strip.text.y.left = element_text(angle = 90),
        panel.spacing = unit(0.0, "lines"),
        strip.text.y = element_text(angle = 0, hjust = 0, size = graph_text),
        #axis.title.x = element_text(size = 8),
        #axis.title.y = element_text(size = 8), #legend.title=element_text(size=8),
        #legend.title=element_text(size=main_text),
        #legend.text=element_text(size=main_text),
        #axis.text.y= element_text(size = axis_text),
        #axis.text.x= element_text(size = axis_text),
        text=element_text(size=graph_text),
        plot.title = element_text(size=main_text, hjust = 0.5))+
  guides(color=FALSE)


ggsave(path=here::here("figures","PIPE_paper"), file="group_win_percentages_party_id_CN", height=10, width=16)

```

## Support for Housing and Nonhousing Policies

```{r}
#| label: pilot_issue_position_all_responses
#| eval: false

# This is SI Fig. G.1. It shows the full distribution of responses to the issue-position question. Statements were support/oppose on a 5 point scale, rather than which position is closer to your views with a DK option in the middle. Expect acquiescence bias. The ordering of the facets is in descending order of the popularity of the nearest corresponding item on our March 2024 survey, except that non-housing items and items not included on the March 2024 survey come at the end.

D_support <- pilot$D %>% 
  pivot_longer(c(`Limit annual property tax increases`:`Reduce gas taxes`), 
                       names_to = "DV", 
                       values_to = "response") %>%
  filter(!is.na(response)) %>%
  nest(data = - DV) %>%
  mutate(
    model = map(data, ~ lm(response>3 ~ 1, data = .)), 
    tidied = map(model, tidy, conf.int = T),
    response.freq = map(data, ~.x %>% 
                            group_by(response) %>%
                            summarize(n = n())
                            )
  ) %>%
  unnest(c(tidied, response.freq)) %>%
  dplyr::select(-data)

# prepare data and facet labels
# temp_D <- pilot$D %>% 
#  left_join(issues %>% dplyr::select(plot_shorthand, lib_position, con_position), by = c("policy" = "plot_shorthand")) %>%
#  mutate(facet_label = lib_position) # I tried to fit lib_position and con_position into the label but just couldn't do it.

D_support %>%
  mutate(constant = "k",
         facet.label = 
           factor(DV, levels = c(
                "Limit annual property tax increases",
                "Prohibit local governments from raising a homeowner's property taxes by more than 2% per year",
                "Adopt laws limiting rent increases",
                "Prohibit landlords from raising a tenant's rent by more than 2% per year",
                "Prohibit landlords from making a new tenant pay higher rents than the previous tenant",
                "Give first-time homebuyers a no-interest government loan for their down payment",
                "Prohibit investment banks and corporations from buying homes",
                "Prohibit foreign investors from buying homes",
                "Prohibit short-term vacation rentals (AirBnB, VRBO)" ,
                "Require apartment developers to rent some units to middle-income people",
                "Require developers of housing projects to sell or rent at least 10% (1 in 10) of the new homes to middle-income people",
                "Require developers of housing projects to sell or rent at least 50% (one half) of the new homes to middle-income people",
                "Require apartment developers to rent some units to low-income people",
                "Require developers of housing projects to sell or rent at least 10% (1 in 10) of the new homes to low-income people",
                "Require developers of housing projects to sell or rent at least 50% (one half) of the new homes to low-income people",
                "Give renters a tax break",
                "Guarantee approval of housing developments that follow rules" ,
                "Make local governments approve housing development projects that comply with objective standards",
                "Require local governments to promptly approve or reject housing proposals",
                "Require local governments to approve or deny housing development projects quickly",
                "End special fees and taxes on housing development",
                "Increase public spending on low-income housing projects",
                "Increase public spending on low-income rent vouchers (\"Section 8\")", 
                "Have the government buy up property to develop later as affordable housing" ,
                "Build more single-family homes on open land",
                "Change zoning to allow more single-family homes on open land",
                "Build more apartments in single-family neighborhoods",
                "Change zoning to allow more apartments in existing neighborhoods",
                "Change zoning to allow homeowners to build an \"accessory dwelling unit\" (small house or apartment) in their backyard" ,
                "Reduce environmental regulations on housing development",
                "Prohibit developers of new housing from tearing down old apartment buildings",
                "Prohibit developers from tearing down old apartment buildings if a tenant lived there recently" ,
                "Require developers of new housing who tear down buildings to give prior low-income residents of those buildings a place to live",
                "Require developers who tear down an apartment building to provide its low-income tenants with other housing at an affordable rent",
                "Prevent misuse of environmental lawsuits by opponents of housing proposals",
                "Make people who file frivolous lawsuits against housing development pay for the costs they impose on others" ,
                "Tax landlords if they leave an apartment vacant" ,
                "Tax homeowners if they leave their homes unoccupied",
                "Tax the profits of \"flippers\"--those who fix up and sell older properties",
                "Make developers of office and commercial buildings pay for housing that future workers will need",
                "Build a border wall to keep illegal immigrants out of the United States" ,
                "Pass \"Medicare for All\" to give health insurance to every American" ,
                "Raise the minimum wage" ,
                "Reduce gas taxes" 
           ))
         ) %>%
  ggplot(aes(x = constant, y = n, fill = factor(response))) +
  scale_y_continuous(labels=scales::percent, expand=c(0,0), breaks = seq(0, 1, by = 0.2)) + 
  scale_x_discrete(position="top") +
  geom_col(position = "fill", width=0.75) +
  geom_errorbar(aes(ymax = conf.high, ymin = conf.low), width = 0.1) +
  scale_fill_manual("Support for \nPolicy",
                    breaks = c("5", "4", "3", "2", "1"), 
                    values = c("darkblue", "lightblue", "lightgrey", "pink", "red"),
                    labels = c("Strongly\nsupport", "Somewhat\nsupport", "Neither support \nnor oppose",  "Somewhat\noppose", "Strongly\noppose")) +
     theme(strip.background = element_blank(),
           legend.position = "bottom",
           legend.title = element_text(size = 9), 
           legend.text = element_text(size = 7),
           plot.margin = ggplot2::margin(0.5, 1, 0.5, 1, "cm"),  
           axis.text.x = element_text(size = 8), # this is the printed x axis after coor_flip() below
           axis.text.y = element_blank(),
           strip.text = element_text(size = 7, hjust = 0, vjust = 0, margin = margin(5, 5, 2, 1)),
           panel.spacing.x = unit(1.25, "lines"),
           panel.spacing.y = unit(0, "lines"),
           # axis.ticks.x = element_blank(),
           axis.ticks.y = element_blank(),
    #     panel.background=element_rect(fill="white", color="black"),
    #     panel.grid.major = element_line(color = 'grey', linetype = 'dotted'),
    #     panel.spacing = unit(0.2, "lines"),
    #     plot.title = element_text(size = 10),
    #     text=element_text(size=10)
         ) +
  coord_flip(ylim = c(0, 1)) +
  facet_wrap(~ facet.label, ncol = 2, dir = "v", labeller = label_wrap_gen(67)) + 
  labs(x = NULL, y = NULL) 
  
ggsave(path=here::here("figures","PIPE_paper"), file="pilot_issue_position_all_responses_v_.pdf", height=9, width=7, units = "in")


```

```{r}
#| label: fig_pilot_support_cor
#| fig-cap: "Pairwise Correlations Among Policy Preferences. Data from Pilot Survey."

# This is an exploratory figure, not included in SI, analogous to the pairwise correlation plots included for the main survey. The number of observations per pair is tiny so the correlations aren't very meaningful.

corr_lab_size <- 4 

# coalesce CE and CN versions of the same question
D_plot <- pilot$D %>% 
  dplyr::select(starts_with("sup."), pid3.wlean, tenure, want.price, libcon) %>%
  mutate(
      `Party ID (Democratic)` = case_when(
      pid3.wlean == "dem" ~ 3,
      pid3.wlean == "io" ~ 2,
      pid3.wlean == "rep" ~ 1
    ),
    `Tenure (Renter)` = case_when(
      tenure == "Renter" ~ 1,
      tenure == "Owner" ~ 0,
    ),
    `Wants Lower Hous. Prices` = case_when(
      want.price == "Lower" ~ 3,
      want.price == "Same" ~ 2,
      want.price == "Higher" ~ 1,
    )
  ) %>%
  rename(`Ideology (Liberal)` = libcon) %>%
  dplyr::select(-c(pid3.wlean, tenure, want.price)) %>% # non-numeric columns
  rowwise() %>%
  mutate( # coalesce the CN and CE versions of the same question
    sup.build.sfh = coalesce(sup.build.sfh.CE, sup.build.sfh.CN),
    sup.demo.ban = coalesce(sup.demo.ban.CE, sup.demo.ban.CN),
    sup.demo.replace = coalesce(sup.demo.replace.CE, sup.demo.replace.CN),
    sup.dev.approval.env = coalesce(sup.dev.approval.env.CE, sup.dev.approval.env.CN),
    sup.dev.approval.fast = coalesce(sup.dev.approval.fast.CE, sup.dev.approval.fast.CN),
    sup.dev.approval.rule = coalesce(sup.dev.approval.rule.CE, sup.dev.approval.rule.CN), 
    sup.IZ.low.CE = mean(c(sup.IZ.low.10.CE, sup.IZ.low.50.CE), na.rm = TRUE), # averaging these rowwise as shorthand
    sup.IZ.low = coalesce(sup.IZ.low.CE, sup.IZ.low.CN),
    sup.IZ.mid.CE = mean(c(sup.IZ.mid.10.CE, sup.IZ.mid.50.CE), na.rm = TRUE), # averaging these rowwise as shorthand
    sup.IZ.mid = coalesce(sup.IZ.mid.CE, sup.IZ.mid.CN),
    sup.prop.tax.control = coalesce(sup.prop.tax.control.02.CE, sup.prop.tax.control.CN),
    sup.rent.control.CE = mean(c(sup.rent.control.02.CE, sup.rent.control.vc.CE), na.rm = TRUE), # averaging these rowwise as shorthand
    sup.rent.control = coalesce(sup.rent.control.CE, sup.rent.control.CN)
  ) %>%
  dplyr::select(-c( # remove all the columns that were coalesced or averaged
    sup.build.sfh.CE, sup.build.sfh.CN,
    sup.demo.ban.CE, sup.demo.ban.CN,
    sup.demo.replace.CE, sup.demo.replace.CN,
    sup.dev.approval.env.CE, sup.dev.approval.env.CN,
    sup.dev.approval.fast.CE, sup.dev.approval.fast.CN,
    sup.dev.approval.rule.CE, sup.dev.approval.rule.CN,
    sup.dev.approval.rule.CE, sup.dev.approval.rule.CN,
    sup.IZ.low.10.CE, sup.IZ.low.50.CE,
    sup.IZ.low.CE, sup.IZ.low.CN,
    sup.IZ.mid.10.CE, sup.IZ.mid.50.CE,
    sup.IZ.mid.CE, sup.IZ.mid.CN,
    sup.prop.tax.control.02.CE, sup.prop.tax.control.CN,
    sup.rent.control.02.CE, sup.rent.control.vc.CE,
    sup.rent.control.CE, sup.rent.control.CN
  )
  ) %>%
  relocate(
    sup.build.sfh,
    sup.build.apt,
    sup.ADU,
    sup.upzone.sfh.CE,
    sup.upzone.apt.CE,
    sup.pub.housing,
    sup.land.bank,
    sup.rent.control,
    sup.vouchers,
    sup.renter.break,
    sup.free.loan,
    sup.ll.vacancy.tax,
    sup.ho.vacancy.tax,
    sup.tax.flip,
    sup.IZ.low,
    sup.IZ.mid,
    sup.demo.replace,
    sup.demo.ban,
    sup.prop.tax.control,
    sup.dev.approval.rule,
    sup.dev.approval.fast,
    sup.dev.approval.env,
    sup.less.env.reg,
    sup.fee.removal,
    sup.bus.hous,
    sup.com.link.fee.CE,
    sup.ban.airbnb,
    sup.ban.foreign,
    sup.ban.corp,
    sup.border.wall,
    sup.medicare.all,
    sup.high.min.wage,
    sup.low.gas.tax,
    `Tenure (Renter)`,
    `Wants Lower Hous. Prices`,
    `Party ID (Democratic)`,
    `Ideology (Liberal)`
  )
    
pmat <- D_plot %>% cor_pmat() 

D_plot %>%
  cor(use="pairwise.complete.obs") %>% 
  ggcorrplot(type="lower",
             lab=T, 
  #           p.mat=pmat,
             insig="pch", 
             lab_size=corr_lab_size) +
  labs(title = "",
       subtitle = "") + 
  theme(legend.position="none", 
        text=element_text(size=(0.4*main_text)), 
        axis.text.x = element_text(size=(0.6*axis_text)),
        axis.text.y = element_text(size=(0.6*axis_text))
        )

ggsave(path=here::here("figures","PIPE_paper"), file="fig_pilot_corplot_policy_prefs.pdf", height=16, width=16)

#count number of observations per correlation coefficient
cor_obs <- psych::pairwiseCount(D_plot, diagonal = FALSE)  
cor_obs <- cor_obs[1:33,1:33] # drop observations on respondent characteristics (party ID, etc)
quantile(cor_obs, na.rm = T) # 25th, 75th are 23, 45. it's no surprise we can't make heads or tails out of this matrix.

```

# References
